15 files changed, 2469 insertions, 842 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 7467eca..1f6a3a5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -11,12 +11,12 @@
 #define INSTCOMBINE_INSTCOMBINE_H
 
 #include "InstCombineWorklist.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Support/InstVisitor.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 
@@ -27,7 +27,7 @@ namespace llvm {
   class DbgDeclareInst;
   class MemIntrinsic;
   class MemSetInst;
-  
+
 /// SelectPatternFlavor - We can match a variety of different patterns for
 /// select operations.
 enum SelectPatternFlavor {
@@ -36,7 +36,7 @@ enum SelectPatternFlavor {
   SPF_SMAX, SPF_UMAX
   //SPF_ABS - TODO.
 };
-  
+
 /// getComplexity:  Assign a complexity or rank value to LLVM Values...
 ///   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
 static inline unsigned getComplexity(Value *V) {
@@ -51,23 +51,23 @@ static inline unsigned getComplexity(Value *V) {
   return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
 }
 
-  
+
 /// InstCombineIRInserter - This is an IRBuilder insertion helper that works
 /// just like the normal insertion helper, but also adds any new instructions
 /// to the instcombine worklist.
-class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter 
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
     : public IRBuilderDefaultInserter<true> {
   InstCombineWorklist &Worklist;
 public:
   InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
-  
+
   void InsertHelper(Instruction *I, const Twine &Name,
                     BasicBlock *BB, BasicBlock::iterator InsertPt) const {
     IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
     Worklist.Add(I);
   }
 };
-  
+
 /// InstCombiner - The -instcombine pass.
 class LLVM_LIBRARY_VISIBILITY InstCombiner
                              : public FunctionPass,
@@ -76,6 +76,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   TargetLibraryInfo *TLI;
   bool MadeIRChange;
   LibCallSimplifier *Simplifier;
+  bool MinimizeSize;
 public:
   /// Worklist - All of the instructions that need to be simplified.
   InstCombineWorklist Worklist;
@@ -84,15 +85,16 @@ public:
   /// instructions into the worklist when they are created.
   typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
   BuilderTy *Builder;
-      
+
   static char ID; // Pass identification, replacement for typeid
   InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+    MinimizeSize = false;
     initializeInstCombinerPass(*PassRegistry::getPassRegistry());
   }
 
 public:
   virtual bool runOnFunction(Function &F);
-  
+
   bool DoOneIteration(Function &F, unsigned ItNum);
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -114,6 +116,8 @@ public:
   Instruction *visitSub(BinaryOperator &I);
   Instruction *visitFSub(BinaryOperator &I);
   Instruction *visitMul(BinaryOperator &I);
+  Value *foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+                       Instruction *InsertBefore);
   Instruction *visitFMul(BinaryOperator &I);
   Instruction *visitURem(BinaryOperator &I);
   Instruction *visitSRem(BinaryOperator &I);
@@ -207,11 +211,11 @@ public:
 private:
   bool ShouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
-  Value *dyn_castFNegVal(Value *V) const;
-  Type *FindElementAtOffset(Type *Ty, int64_t Offset, 
+  Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
+  Type *FindElementAtOffset(Type *Ty, int64_t Offset,
                                   SmallVectorImpl<Value*> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
-                                 
+
   /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
   /// results in any code being generated and is interesting to optimize out. If
   /// the cast can be eliminated by some other simple transformation, we prefer
@@ -243,7 +247,7 @@ public:
     return New;
   }
 
-  // InsertNewInstWith - same as InsertNewInstBefore, but also sets the 
+  // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
   // debug loc.
   //
   Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
@@ -259,10 +263,10 @@ public:
   //
   Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
     Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
-    
+
     // If we are replacing the instruction with itself, this must be in a
     // segment of unreachable code, so just clobber the instruction.
-    if (&I == V) 
+    if (&I == V)
       V = UndefValue::get(I.getType());
 
     DEBUG(errs() << "IC: Replacing " << I << "\n"
@@ -292,13 +296,13 @@ public:
     MadeIRChange = true;
     return 0;  // Don't do anything with FI
   }
-      
+
   void ComputeMaskedBits(Value *V, APInt &KnownZero,
                          APInt &KnownOne, unsigned Depth = 0) const {
     return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
   }
-  
-  bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+
+  bool MaskedValueIsZero(Value *V, const APInt &Mask,
                          unsigned Depth = 0) const {
     return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
   }
@@ -321,21 +325,26 @@ private:
 
   /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
   /// based on the demanded bits.
-  Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 
+  Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
                                  APInt& KnownZero, APInt& KnownOne,
                                  unsigned Depth);
-  bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+  bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
                             APInt& KnownZero, APInt& KnownOne,
                             unsigned Depth=0);
-      
+  /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
+  /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
+  Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
+                                    APInt DemandedMask, APInt &KnownZero,
+                                    APInt &KnownOne);
+
   /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
   /// SimplifyDemandedBits knows about.  See if the instruction has any
   /// properties that allow us to simplify its operands.
   bool SimplifyDemandedInstructionBits(Instruction &Inst);
-      
+
   Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                     APInt& UndefElts, unsigned Depth = 0);
-    
+
   // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
   // which has a PHI node as operand #0, see if we can fold the instruction
   // into the PHI (which is only possible if all operands to the PHI are
@@ -351,10 +360,10 @@ private:
   Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
   Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
 
-  
+
   Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
                         ConstantInt *AndRHS, BinaryOperator &TheAnd);
-  
+
   Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
                             bool isSub, Instruction &I);
   Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
@@ -373,8 +382,8 @@ private:
   Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
 };
 
-      
-  
+
+
 } // end namespace llvm.
 
 #endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d8257e6..7595da0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -13,16 +13,840 @@
 
 #include "InstCombine.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
 
+namespace {
+
+  /// Class representing coefficient of floating-point addend.
+  /// This class needs to be highly efficient, which is especially true for
+  /// the constructor. As of I write this comment, the cost of the default
+  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 
+  /// perform write-merging).
+  /// 
+  class FAddendCoef {
+  public:
+    // The constructor has to initialize a APFloat, which is uncessary for
+    // most addends which have coefficient either 1 or -1. So, the constructor
+    // is expensive. In order to avoid the cost of the constructor, we should
+    // reuse some instances whenever possible. The pre-created instances
+    // FAddCombine::Add[0-5] embodies this idea.
+    //
+    FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
+    ~FAddendCoef();
+  
+    void set(short C) {
+      assert(!insaneIntVal(C) && "Insane coefficient");
+      IsFp = false; IntVal = C;
+    }
+  
+    void set(const APFloat& C);
+
+    void negate();
+  
+    bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
+    Value *getValue(Type *) const;
+  
+    // If possible, don't define operator+/operator- etc because these
+    // operators inevitably call FAddendCoef's constructor which is not cheap.
+    void operator=(const FAddendCoef &A);
+    void operator+=(const FAddendCoef &A);
+    void operator-=(const FAddendCoef &A);
+    void operator*=(const FAddendCoef &S);
+  
+    bool isOne() const { return isInt() && IntVal == 1; }
+    bool isTwo() const { return isInt() && IntVal == 2; }
+    bool isMinusOne() const { return isInt() && IntVal == -1; }
+    bool isMinusTwo() const { return isInt() && IntVal == -2; }
+  
+  private:
+    bool insaneIntVal(int V) { return V > 4 || V < -4; }
+    APFloat *getFpValPtr(void)
+      { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
+    const APFloat *getFpValPtr(void) const
+      { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
+
+    const APFloat &getFpVal(void) const {
+      assert(IsFp && BufHasFpVal && "Incorret state");
+      return *getFpValPtr();
+    }
+
+    APFloat &getFpVal(void)
+      { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
+  
+    bool isInt() const { return !IsFp; }
+
+    // If the coefficient is represented by an integer, promote it to a
+    // floating point. 
+    void convertToFpType(const fltSemantics &Sem);
+
+    // Construct an APFloat from a signed integer.
+    // TODO: We should get rid of this function when APFloat can be constructed
+    //       from an *SIGNED* integer. 
+    APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
+  private:
+
+    bool IsFp;
+  
+    // True iff FpValBuf contains an instance of APFloat.
+    bool BufHasFpVal;
+  
+    // The integer coefficient of an individual addend is either 1 or -1,
+    // and we try to simplify at most 4 addends from neighboring at most
+    // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
+    // is overkill of this end.
+    short IntVal;
+
+    AlignedCharArrayUnion<APFloat> FpValBuf;
+  };
+  
+  /// FAddend is used to represent floating-point addend. An addend is
+  /// represented as <C, V>, where the V is a symbolic value, and C is a
+  /// constant coefficient. A constant addend is represented as <C, 0>.
+  ///
+  class FAddend {
+  public:
+    FAddend() { Val = 0; }
+  
+    Value *getSymVal (void) const { return Val; }
+    const FAddendCoef &getCoef(void) const { return Coeff; }
+  
+    bool isConstant() const { return Val == 0; }
+    bool isZero() const { return Coeff.isZero(); }
+
+    void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; }
+    void set(const APFloat& Coefficient, Value *V)
+      { Coeff.set(Coefficient); Val = V; }
+    void set(const ConstantFP* Coefficient, Value *V)
+      { Coeff.set(Coefficient->getValueAPF()); Val = V; }
+  
+    void negate() { Coeff.negate(); }
+  
+    /// Drill down the U-D chain one step to find the definition of V, and
+    /// try to break the definition into one or two addends.
+    static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
+  
+    /// Similar to FAddend::drillDownOneStep() except that the value being
+    /// splitted is the addend itself.
+    unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
+  
+    void operator+=(const FAddend &T) {
+      assert((Val == T.Val) && "Symbolic-values disagree");
+      Coeff += T.Coeff;
+    }
+
+  private:
+    void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
+  
+    // This addend has the value of "Coeff * Val".
+    Value *Val;
+    FAddendCoef Coeff;
+  };
+  
+  /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
+  /// with its neighboring at most two instructions.
+  ///
+  class FAddCombine {
+  public:
+    FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
+    Value *simplify(Instruction *FAdd);
+  
+  private:
+    typedef SmallVector<const FAddend*, 4> AddendVect;
+  
+    Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
+
+    Value *performFactorization(Instruction *I);
+
+    /// Convert given addend to a Value
+    Value *createAddendVal(const FAddend &A, bool& NeedNeg);
+    
+    /// Return the number of instructions needed to emit the N-ary addition.
+    unsigned calcInstrNumber(const AddendVect& Vect);
+    Value *createFSub(Value *Opnd0, Value *Opnd1);
+    Value *createFAdd(Value *Opnd0, Value *Opnd1);
+    Value *createFMul(Value *Opnd0, Value *Opnd1);
+    Value *createFDiv(Value *Opnd0, Value *Opnd1);
+    Value *createFNeg(Value *V);
+    Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
+    void createInstPostProc(Instruction *NewInst);
+  
+    InstCombiner::BuilderTy *Builder;
+    Instruction *Instr;
+  
+  private:
+     // Debugging stuff are clustered here.
+    #ifndef NDEBUG
+      unsigned CreateInstrNum;
+      void initCreateInstNum() { CreateInstrNum = 0; }
+      void incCreateInstNum() { CreateInstrNum++; }
+    #else
+      void initCreateInstNum() {}
+      void incCreateInstNum() {}
+    #endif
+  };
+} 
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of
+//    {FAddendCoef, FAddend, FAddition, FAddCombine}.
+//
+//===----------------------------------------------------------------------===//
+FAddendCoef::~FAddendCoef() {
+  if (BufHasFpVal)
+    getFpValPtr()->~APFloat();
+}
+
+void FAddendCoef::set(const APFloat& C) {
+  APFloat *P = getFpValPtr();
+
+  if (isInt()) {
+    // As the buffer is meanless byte stream, we cannot call
+    // APFloat::operator=().
+    new(P) APFloat(C);
+  } else
+    *P = C;
+
+  IsFp = BufHasFpVal = true; 
+}
+
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+  if (!isInt())
+    return;
+
+  APFloat *P = getFpValPtr();
+  if (IntVal > 0)
+    new(P) APFloat(Sem, IntVal);
+  else {
+    new(P) APFloat(Sem, 0 - IntVal);
+    P->changeSign();
+  }
+  IsFp = BufHasFpVal = true; 
+}
+
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+  if (Val >= 0)
+    return APFloat(Sem, Val);
+
+  APFloat T(Sem, 0 - Val);
+  T.changeSign();
+
+  return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
+  if (That.isInt())
+    set(That.IntVal);
+  else
+    set(That.getFpVal());
+}
+
+void FAddendCoef::operator+=(const FAddendCoef &That) {
+  enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
+  if (isInt() == That.isInt()) {
+    if (isInt())
+      IntVal += That.IntVal;
+    else
+      getFpVal().add(That.getFpVal(), RndMode);
+    return;
+  }
+  
+  if (isInt()) {
+    const APFloat &T = That.getFpVal();
+    convertToFpType(T.getSemantics());
+    getFpVal().add(T, RndMode);
+    return;
+  }
+  
+  APFloat &T = getFpVal();
+  T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
+}
+
+void FAddendCoef::operator-=(const FAddendCoef &That) {
+  enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
+  if (isInt() == That.isInt()) {
+    if (isInt())
+      IntVal -= That.IntVal;
+    else
+      getFpVal().subtract(That.getFpVal(), RndMode);
+    return;
+  }
+  
+  if (isInt()) {
+    const APFloat &T = That.getFpVal();
+    convertToFpType(T.getSemantics());
+    getFpVal().subtract(T, RndMode);
+    return;
+  }
+
+  APFloat &T = getFpVal();
+  T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
+}
+
+void FAddendCoef::operator*=(const FAddendCoef &That) {
+  if (That.isOne())
+    return;
+
+  if (That.isMinusOne()) {
+    negate();
+    return;
+  }
+
+  if (isInt() && That.isInt()) {
+    int Res = IntVal * (int)That.IntVal;
+    assert(!insaneIntVal(Res) && "Insane int value");
+    IntVal = Res;
+    return;
+  }
+
+  const fltSemantics &Semantic = 
+    isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
+
+  if (isInt())
+    convertToFpType(Semantic);
+  APFloat &F0 = getFpVal();
+
+  if (That.isInt())
+    F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+                APFloat::rmNearestTiesToEven);
+  else
+    F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
+
+  return;
+}
+
+void FAddendCoef::negate() {
+  if (isInt())
+    IntVal = 0 - IntVal;
+  else
+    getFpVal().changeSign();
+}
+
+Value *FAddendCoef::getValue(Type *Ty) const {
+  return isInt() ?
+    ConstantFP::get(Ty, float(IntVal)) :
+    ConstantFP::get(Ty->getContext(), getFpVal());
+}
+
+// The definition of <Val>     Addends
+// =========================================
+//  A + B                     <1, A>, <1,B>
+//  A - B                     <1, A>, <1,B>
+//  0 - B                     <-1, B>
+//  C * A,                    <C, A>
+//  A + C                     <1, A> <C, NULL> 
+//  0 +/- 0                   <0, NULL> (corner case)
+//
+// Legend: A and B are not constant, C is constant
+// 
+unsigned FAddend::drillValueDownOneStep
+  (Value *Val, FAddend &Addend0, FAddend &Addend1) {
+  Instruction *I = 0;
+  if (Val == 0 || !(I = dyn_cast<Instruction>(Val)))
+    return 0;
+
+  unsigned Opcode = I->getOpcode();
+
+  if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {
+    ConstantFP *C0, *C1;
+    Value *Opnd0 = I->getOperand(0);
+    Value *Opnd1 = I->getOperand(1);
+    if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
+      Opnd0 = 0;
+
+    if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
+      Opnd1 = 0;
+
+    if (Opnd0) {
+      if (!C0)
+        Addend0.set(1, Opnd0);
+      else
+        Addend0.set(C0, 0);
+    }
+
+    if (Opnd1) {
+      FAddend &Addend = Opnd0 ? Addend1 : Addend0;
+      if (!C1)
+        Addend.set(1, Opnd1);
+      else
+        Addend.set(C1, 0);
+      if (Opcode == Instruction::FSub)
+        Addend.negate();
+    }
+
+    if (Opnd0 || Opnd1)
+      return Opnd0 && Opnd1 ? 2 : 1;
+
+    // Both operands are zero. Weird!
+    Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0);
+    return 1;
+  }
+
+  if (I->getOpcode() == Instruction::FMul) {
+    Value *V0 = I->getOperand(0);
+    Value *V1 = I->getOperand(1);
+    if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {
+      Addend0.set(C, V1);
+      return 1;
+    }
+
+    if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {
+      Addend0.set(C, V0);
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+// Try to break *this* addend into two addends. e.g. Suppose this addend is
+// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
+// i.e. <2.3, X> and <2.3, Y>.
+//
+unsigned FAddend::drillAddendDownOneStep
+  (FAddend &Addend0, FAddend &Addend1) const {
+  if (isConstant())
+    return 0;
+
+  unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
+  if (!BreakNum || Coeff.isOne()) 
+    return BreakNum;
+
+  Addend0.Scale(Coeff);
+
+  if (BreakNum == 2)
+    Addend1.Scale(Coeff);
+
+  return BreakNum;
+}
+
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+//   Instruction "I" is                Simplified into
+// -------------------------------------------------------
+//   (x * y) +/- (x * z)               x * (y +/- z)
+//   (y / x) +/- (z / x)               (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+  assert((I->getOpcode() == Instruction::FAdd ||
+          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+  
+  Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+  Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+  
+  if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+    return 0;
+
+  bool isMpy = false;
+  if (I0->getOpcode() == Instruction::FMul)
+    isMpy = true;
+  else if (I0->getOpcode() != Instruction::FDiv)
+    return 0;
+
+  Value *Opnd0_0 = I0->getOperand(0);
+  Value *Opnd0_1 = I0->getOperand(1);
+  Value *Opnd1_0 = I1->getOperand(0);
+  Value *Opnd1_1 = I1->getOperand(1);
+
+  //  Input Instr I       Factor   AddSub0  AddSub1 
+  //  ----------------------------------------------
+  // (x*y) +/- (x*z)        x        y         z
+  // (y/x) +/- (z/x)        x        y         z
+  //
+  Value *Factor = 0;
+  Value *AddSub0 = 0, *AddSub1 = 0;
+  
+  if (isMpy) {
+    if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+      Factor = Opnd0_0;
+    else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+      Factor = Opnd0_1;
+
+    if (Factor) {
+      AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+      AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+    }
+  } else if (Opnd0_1 == Opnd1_1) {
+    Factor = Opnd0_1;
+    AddSub0 = Opnd0_0;
+    AddSub1 = Opnd1_0;
+  }
+
+  if (!Factor)
+    return 0;
+
+  // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+  Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+                      createFAdd(AddSub0, AddSub1) :
+                      createFSub(AddSub0, AddSub1);
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+    const APFloat &F = CFP->getValueAPF();
+    if (!F.isNormal() || F.isDenormal())
+      return 0;
+  }
+
+  if (isMpy)
+    return createFMul(Factor, NewAddSub);
+ 
+  return createFDiv(NewAddSub, Factor);
+}
+
+Value *FAddCombine::simplify(Instruction *I) {
+  assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
+
+  // Currently we are not able to handle vector type.
+  if (I->getType()->isVectorTy())
+    return 0;
+
+  assert((I->getOpcode() == Instruction::FAdd ||
+          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+  // Save the instruction before calling other member-functions. 
+  Instr = I;
+
+  FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
+
+  unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
+
+  // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
+  unsigned Opnd0_ExpNum = 0;
+  unsigned Opnd1_ExpNum = 0;
+
+  if (!Opnd0.isConstant()) 
+    Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
+
+  // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
+  if (OpndNum == 2 && !Opnd1.isConstant())
+    Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
+
+  // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
+  if (Opnd0_ExpNum && Opnd1_ExpNum) {
+    AddendVect AllOpnds;
+    AllOpnds.push_back(&Opnd0_0);
+    AllOpnds.push_back(&Opnd1_0);
+    if (Opnd0_ExpNum == 2)
+      AllOpnds.push_back(&Opnd0_1);
+    if (Opnd1_ExpNum == 2)
+      AllOpnds.push_back(&Opnd1_1);
+
+    // Compute instruction quota. We should save at least one instruction.
+    unsigned InstQuota = 0;
+
+    Value *V0 = I->getOperand(0);
+    Value *V1 = I->getOperand(1);
+    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&  
+                 (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
+
+    if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
+      return R;
+  }
+
+  if (OpndNum != 2) {
+    // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
+    // splitted into two addends, say "V = X - Y", the instruction would have
+    // been optimized into "I = Y - X" in the previous steps.
+    //
+    const FAddendCoef &CE = Opnd0.getCoef();
+    return CE.isOne() ? Opnd0.getSymVal() : 0;
+  }
+
+  // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
+  if (Opnd1_ExpNum) {
+    AddendVect AllOpnds;
+    AllOpnds.push_back(&Opnd0);
+    AllOpnds.push_back(&Opnd1_0);
+    if (Opnd1_ExpNum == 2)
+      AllOpnds.push_back(&Opnd1_1);
+
+    if (Value *R = simplifyFAdd(AllOpnds, 1))
+      return R;
+  }
+
+  // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
+  if (Opnd0_ExpNum) {
+    AddendVect AllOpnds;
+    AllOpnds.push_back(&Opnd1);
+    AllOpnds.push_back(&Opnd0_0);
+    if (Opnd0_ExpNum == 2)
+      AllOpnds.push_back(&Opnd0_1);
+
+    if (Value *R = simplifyFAdd(AllOpnds, 1))
+      return R;
+  }
+
+  // step 6: Try factorization as the last resort, 
+  return performFactorization(I);
+}
+
+Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
+
+  unsigned AddendNum = Addends.size();
+  assert(AddendNum <= 4 && "Too many addends");
+
+  // For saving intermediate results; 
+  unsigned NextTmpIdx = 0;
+  FAddend TmpResult[3];
+
+  // Points to the constant addend of the resulting simplified expression.
+  // If the resulting expr has constant-addend, this constant-addend is
+  // desirable to reside at the top of the resulting expression tree. Placing
+  // constant close to supper-expr(s) will potentially reveal some optimization
+  // opportunities in super-expr(s).
+  //
+  const FAddend *ConstAdd = 0;
+
+  // Simplified addends are placed <SimpVect>.
+  AddendVect SimpVect;
+
+  // The outer loop works on one symbolic-value at a time. Suppose the input
+  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 
+  // The symbolic-values will be processed in this order: x, y, z.
+  //
+  for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
+
+    const FAddend *ThisAddend = Addends[SymIdx];
+    if (!ThisAddend) {
+      // This addend was processed before.
+      continue;
+    }
+
+    Value *Val = ThisAddend->getSymVal();
+    unsigned StartIdx = SimpVect.size();
+    SimpVect.push_back(ThisAddend);
+
+    // The inner loop collects addends sharing same symbolic-value, and these
+    // addends will be later on folded into a single addend. Following above
+    // example, if the symbolic value "y" is being processed, the inner loop
+    // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
+    // be later on folded into "<b1+b2, y>".
+    //
+    for (unsigned SameSymIdx = SymIdx + 1;
+         SameSymIdx < AddendNum; SameSymIdx++) {
+      const FAddend *T = Addends[SameSymIdx];
+      if (T && T->getSymVal() == Val) {
+        // Set null such that next iteration of the outer loop will not process
+        // this addend again.
+        Addends[SameSymIdx] = 0; 
+        SimpVect.push_back(T);
+      }
+    }
+
+    // If multiple addends share same symbolic value, fold them together.
+    if (StartIdx + 1 != SimpVect.size()) {
+      FAddend &R = TmpResult[NextTmpIdx ++];
+      R = *SimpVect[StartIdx];
+      for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)
+        R += *SimpVect[Idx];
+
+      // Pop all addends being folded and push the resulting folded addend.
+      SimpVect.resize(StartIdx); 
+      if (Val != 0) {
+        if (!R.isZero()) {
+          SimpVect.push_back(&R);
+        }
+      } else {
+        // Don't push constant addend at this time. It will be the last element
+        // of <SimpVect>.
+        ConstAdd = &R;
+      }
+    }
+  }
+
+  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 
+         "out-of-bound access");
+
+  if (ConstAdd)
+    SimpVect.push_back(ConstAdd);
+
+  Value *Result;
+  if (!SimpVect.empty())
+    Result = createNaryFAdd(SimpVect, InstrQuota);
+  else {
+    // The addition is folded to 0.0.
+    Result = ConstantFP::get(Instr->getType(), 0.0);
+  }
+
+  return Result;
+}
+
+Value *FAddCombine::createNaryFAdd
+  (const AddendVect &Opnds, unsigned InstrQuota) {
+  assert(!Opnds.empty() && "Expect at least one addend");
+
+  // Step 1: Check if the # of instructions needed exceeds the quota.
+  // 
+  unsigned InstrNeeded = calcInstrNumber(Opnds);
+  if (InstrNeeded > InstrQuota)
+    return 0;
+
+  initCreateInstNum();
+
+  // step 2: Emit the N-ary addition.
+  // Note that at most three instructions are involved in Fadd-InstCombine: the
+  // addition in question, and at most two neighboring instructions.
+  // The resulting optimized addition should have at least one less instruction
+  // than the original addition expression tree. This implies that the resulting
+  // N-ary addition has at most two instructions, and we don't need to worry
+  // about tree-height when constructing the N-ary addition.
+
+  Value *LastVal = 0;
+  bool LastValNeedNeg = false;
+
+  // Iterate the addends, creating fadd/fsub using adjacent two addends.
+  for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
+       I != E; I++) {
+    bool NeedNeg; 
+    Value *V = createAddendVal(**I, NeedNeg);
+    if (!LastVal) {
+      LastVal = V;
+      LastValNeedNeg = NeedNeg;
+      continue;
+    }
+
+    if (LastValNeedNeg == NeedNeg) {
+      LastVal = createFAdd(LastVal, V);
+      continue;
+    }
+
+    if (LastValNeedNeg)
+      LastVal = createFSub(V, LastVal);
+    else
+      LastVal = createFSub(LastVal, V);
+
+    LastValNeedNeg = false;
+  }
+
+  if (LastValNeedNeg) {
+    LastVal = createFNeg(LastVal);
+  }
+
+  #ifndef NDEBUG
+    assert(CreateInstrNum == InstrNeeded && 
+           "Inconsistent in instruction numbers");
+  #endif
+
+  return LastVal;
+}
+
+Value *FAddCombine::createFSub
+  (Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFSub(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+Value *FAddCombine::createFNeg(Value *V) {
+  Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0));
+  return createFSub(Zero, V);
+}
+
+Value *FAddCombine::createFAdd
+  (Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFMul(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+void FAddCombine::createInstPostProc(Instruction *NewInstr) {
+  NewInstr->setDebugLoc(Instr->getDebugLoc());
+
+  // Keep track of the number of instruction created.
+  incCreateInstNum();
+
+  // Propagate fast-math flags
+  NewInstr->setFastMathFlags(Instr->getFastMathFlags());
+}
+
+// Return the number of instruction needed to emit the N-ary addition.
+// NOTE: Keep this function in sync with createAddendVal().
+unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
+  unsigned OpndNum = Opnds.size();
+  unsigned InstrNeeded = OpndNum - 1;
+
+  // The number of addends in the form of "(-1)*x". 
+  unsigned NegOpndNum = 0; 
+
+  // Adjust the number of instructions needed to emit the N-ary add.
+  for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
+       I != E; I++) {
+    const FAddend *Opnd = *I;
+    if (Opnd->isConstant())
+      continue;
+
+    const FAddendCoef &CE = Opnd->getCoef();
+    if (CE.isMinusOne() || CE.isMinusTwo())
+      NegOpndNum++;
+
+    // Let the addend be "c * x". If "c == +/-1", the value of the addend
+    // is immediately available; otherwise, it needs exactly one instruction
+    // to evaluate the value.
+    if (!CE.isMinusOne() && !CE.isOne())
+      InstrNeeded++;
+  }
+  if (NegOpndNum == OpndNum)
+    InstrNeeded++;
+  return InstrNeeded;
+}
+
+// Input Addend        Value           NeedNeg(output)
+// ================================================================
+// Constant C          C               false
+// <+/-1, V>           V               coefficient is -1
+// <2/-2, V>          "fadd V, V"      coefficient is -2
+// <C, V>             "fmul V, C"      false
+//
+// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
+Value *FAddCombine::createAddendVal
+  (const FAddend &Opnd, bool &NeedNeg) {
+  const FAddendCoef &Coeff = Opnd.getCoef();
+
+  if (Opnd.isConstant()) {
+    NeedNeg = false;
+    return Coeff.getValue(Instr->getType());
+  }
+
+  Value *OpndVal = Opnd.getSymVal();
+
+  if (Coeff.isMinusOne() || Coeff.isOne()) {
+    NeedNeg = Coeff.isMinusOne();
+    return OpndVal;
+  }
+
+  if (Coeff.isTwo() || Coeff.isMinusTwo()) {
+    NeedNeg = Coeff.isMinusTwo();
+    return createFAdd(OpndVal, OpndVal);
+  }
+
+  NeedNeg = false;
+  return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
+}
+
 /// AddOne - Add one to a ConstantInt.
 static Constant *AddOne(Constant *C) {
   return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
 }
+
 /// SubOne - Subtract one from a ConstantInt.
 static Constant *SubOne(ConstantInt *C) {
   return ConstantInt::get(C->getContext(), C->getValue()-1);
@@ -37,10 +861,10 @@ static Constant *SubOne(ConstantInt *C) {
 static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
   if (!V->hasOneUse() || !V->getType()->isIntegerTy())
     return 0;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return 0;
-  
+
   if (I->getOpcode() == Instruction::Mul)
     if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
       return I->getOperand(0);
@@ -64,22 +888,22 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
 bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
   // There are different heuristics we can use for this.  Here are some simple
   // ones.
-  
-  // Add has the property that adding any two 2's complement numbers can only 
+
+  // Add has the property that adding any two 2's complement numbers can only
   // have one carry bit which can change a sign.  As such, if LHS and RHS each
   // have at least two sign bits, we know that the addition of the two values
   // will sign extend fine.
   if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
     return true;
-  
-  
+
+
   // If one of the operands only has one non-zero bit, and if the other operand
   // has a known-zero bit in a more significant place than it (not including the
   // sign bit) the ripple may go up to and fill the zero, but won't change the
   // sign.  For example, (X & ~4) + 1.
-  
+
   // TODO: Implement.
-  
+
   return false;
 }
 
@@ -100,7 +924,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     const APInt &Val = CI->getValue();
     if (Val.isSignBit())
       return BinaryOperator::CreateXor(LHS, RHS);
-    
+
     // See if SimplifyDemandedBits can simplify this.  This handles stuff like
     // (X & 254)+1 -> (X&254)|1
     if (SimplifyDemandedInstructionBits(I))
@@ -110,7 +934,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
       if (ZI->getSrcTy()->isIntegerTy(1))
         return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
-    
+
     Value *XorLHS = 0; ConstantInt *XorRHS = 0;
     if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
       uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
@@ -124,13 +948,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         else if (XorRHS->getValue().isPowerOf2())
           ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
       }
-      
+
       if (ExtendAmt) {
         APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
         if (!MaskedValueIsZero(XorLHS, Mask))
           ExtendAmt = 0;
       }
-      
+
       if (ExtendAmt) {
         Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
         Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
@@ -175,7 +999,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
         return BinaryOperator::CreateNeg(NewAdd);
       }
-    
+
     return BinaryOperator::CreateSub(RHS, LHSV);
   }
 
@@ -209,7 +1033,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       APInt RHSKnownOne(IT->getBitWidth(), 0);
       APInt RHSKnownZero(IT->getBitWidth(), 0);
       ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
-      
+
       // No bits in common -> bitwise or.
       if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
         return BinaryOperator::CreateOr(LHS, RHS);
@@ -251,7 +1075,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       // See if all bits from the first bit set in the Add RHS up are included
       // in the mask.  First, get the rightmost bit.
       const APInt &AddRHSV = CRHS->getValue();
-      
+
       // Form a mask of all bits from the lowest bit added through the top.
       APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
 
@@ -289,7 +1113,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the true select value.
         return SelectInst::Create(SI->getCondition(), N, A);
-      
+
       if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the false select value.
         return SelectInst::Create(SI->getCondition(), A, N);
@@ -301,18 +1125,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
     // (add (sext x), cst) --> (sext (add x, cst'))
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
-      Constant *CI = 
+      Constant *CI =
         ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
           ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new, smaller add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                               CI, "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
-    
+
     // (add (sext x), (sext y)) --> (sext (add int x, y))
     if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
       // Only do this if x/y have the same type, if at last one of them has a
@@ -323,7 +1147,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                              RHSConv->getOperand(0), "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
@@ -351,18 +1175,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
-  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-    // X + 0 --> X
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
-                              (I.getType())->getValueAPF()))
-        return ReplaceInstUsesWith(I, LHS);
-    }
+  if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD))
+    return ReplaceInstUsesWith(I, V);
 
-    if (isa<PHINode>(LHS))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
+  if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
@@ -374,11 +1192,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     if (Value *V = dyn_castFNegVal(RHS))
       return BinaryOperator::CreateFSub(LHS, V);
 
-  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
-    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
-      return ReplaceInstUsesWith(I, LHS);
-
   // Check for (fadd double (sitofp x), y), see if we can merge this into an
   // integer add followed by a promotion.
   if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
@@ -388,7 +1201,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     // requires a constant pool load, and generally allows the add to be better
     // instcombined.
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
-      Constant *CI = 
+      Constant *CI =
       ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
           ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
@@ -399,7 +1212,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
-    
+
     // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
     if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
       // Only do this if x/y have the same type, if at last one of them has a
@@ -410,13 +1223,18 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                               RHSConv->getOperand(0),"addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
   }
-  
+
+  if (I.hasUnsafeAlgebra()) {
+    if (Value *V = FAddCombine(Builder).simplify(&I))
+      return ReplaceInstUsesWith(I, V);
+  }
+
   return Changed ? &I : 0;
 }
 
@@ -428,7 +1246,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
 Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
                                                Type *Ty) {
   assert(TD && "Must have target data info for this");
-  
+
   // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
   // this.
   bool Swapped = false;
@@ -451,7 +1269,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
       }
     }
   }
-  
+
   if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
     // X - (gep X, ...)
     if (RHSGEP->getOperand(0) == LHS) {
@@ -467,16 +1285,16 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
       }
     }
   }
-  
+
   // Avoid duplicating the arithmetic if GEP2 has non-constant indices and
   // multiple users.
   if (GEP1 == 0 ||
       (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
     return 0;
-  
+
   // Emit the offset of the GEP and an intptr_t.
   Value *Result = EmitGEPOffset(GEP1);
-  
+
   // If we had a constant expression GEP on the other side offsetting the
   // pointer, subtract it from the offset we have.
   if (GEP2) {
@@ -517,7 +1335,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   // Replace (-1 - A) with (~A).
   if (match(Op0, m_AllOnes()))
     return BinaryOperator::CreateNot(Op1);
-  
+
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
     // C - ~X == X + (1+C)
     Value *X = 0;
@@ -551,20 +1369,30 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
     if (SimplifyDemandedInstructionBits(I))
       return &I;
+
+    // Fold (sub 0, (zext bool to B)) --> (sext bool to B)
+    if (C->isZero() && match(Op1, m_ZExt(m_Value(X))))
+      if (X->getType()->isIntegerTy(1))
+        return CastInst::CreateSExtOrBitCast(X, Op1->getType());
+
+    // Fold (sub 0, (sext bool to B)) --> (zext bool to B)
+    if (C->isZero() && match(Op1, m_SExt(m_Value(X))))
+      if (X->getType()->isIntegerTy(1))
+        return CastInst::CreateZExtOrBitCast(X, Op1->getType());
   }
 
-  
+
   { Value *Y;
     // X-(X+Y) == -Y    X-(Y+X) == -Y
     if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
         match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
       return BinaryOperator::CreateNeg(Y);
-    
+
     // (X-Y)-X == -Y
     if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
       return BinaryOperator::CreateNeg(Y);
   }
-  
+
   if (Op1->hasOneUse()) {
     Value *X = 0, *Y = 0, *Z = 0;
     Constant *C = 0;
@@ -581,7 +1409,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
       return BinaryOperator::CreateAnd(Op0,
                                   Builder->CreateNot(Y, Y->getName() + ".not"));
-    
+
     // 0 - (X sdiv C)  -> (X sdiv -C)
     if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
         match(Op0, m_Zero()))
@@ -604,14 +1432,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
       return BinaryOperator::CreateMul(Op0, C);
     }
-    
+
     // X - A*-B -> X + A*B
     // X - -A*B -> X + A*B
     Value *A, *B;
     if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
         match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
       return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
-      
+
     // X - A*CI -> X + A*-CI
     // X - CI*A -> X + A*-CI
     if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
@@ -630,7 +1458,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     if (X == dyn_castFoldableMul(Op1, C2))
       return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
   }
-  
+
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
   if (TD) {
@@ -639,23 +1467,31 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         match(Op1, m_PtrToInt(m_Value(RHSOp))))
       if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
         return ReplaceInstUsesWith(I, Res);
-    
+
     // trunc(p)-trunc(q) -> trunc(p-q)
     if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
         match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
       if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
         return ReplaceInstUsesWith(I, Res);
   }
-  
+
   return 0;
 }
 
 Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD))
+    return ReplaceInstUsesWith(I, V);
+
   // If this is a 'B = x-(-A)', change to B = x+A...
   if (Value *V = dyn_castFNegVal(Op1))
     return BinaryOperator::CreateFAdd(Op0, V);
 
+  if (I.hasUnsafeAlgebra()) {
+    if (Value *V = FAddCombine(Builder).simplify(&I))
+      return ReplaceInstUsesWith(I, V);
+  }
+
   return 0;
 }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7d0af0d..990cbc3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -12,18 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
 using namespace llvm;
 using namespace PatternMatch;
 
 
 /// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(Constant *C) {
-  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+static Constant *AddOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue() + 1);
 }
 /// SubOne - Subtract one from a ConstantInt.
 static Constant *SubOne(ConstantInt *C) {
@@ -36,15 +36,15 @@ static inline bool isFreeToInvert(Value *V) {
   // ~(~(X)) -> X.
   if (BinaryOperator::isNot(V))
     return true;
-  
+
   // Constants can be considered to be not'ed values.
   if (isa<ConstantInt>(V))
     return true;
-  
+
   // Compares can be inverted if they have a single use.
   if (CmpInst *CI = dyn_cast<CmpInst>(V))
     return CI->hasOneUse();
-  
+
   return false;
 }
 
@@ -56,7 +56,7 @@ static inline Value *dyn_castNotVal(Value *V) {
     if (!isFreeToInvert(Operand))
       return Operand;
   }
-  
+
   // Constants can be considered to be not'ed values...
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
     return ConstantInt::get(C->getType(), ~C->getValue());
@@ -91,7 +91,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
 }
 
 /// getNewICmpValue - This is the complement of getICmpCode, which turns an
-/// opcode and two operands into either a constant true or false, or a brand 
+/// opcode and two operands into either a constant true or false, or a brand
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
 static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
@@ -118,7 +118,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,
   case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
   case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
   case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
-  case 7: 
+  case 7:
     if (!isordered) return ConstantInt::getTrue(LHS->getContext());
     Pred = FCmpInst::FCMP_ORD; break;
   }
@@ -154,7 +154,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
         Or->takeName(Op);
         return BinaryOperator::CreateAnd(Or, AndRHS);
       }
-      
+
       ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
       if (TogetherCI && !TogetherCI->isZero()){
         // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
@@ -166,7 +166,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
         return BinaryOperator::CreateOr(And, OpRHS);
       }
     }
-    
+
     break;
   case Instruction::Add:
     if (Op->hasOneUse()) {
@@ -215,7 +215,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     if (CI->getValue() == ShlMask)
       // Masking out bits that the shift already masks.
       return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
-    
+
     if (CI != AndRHS) {                  // Reducing bits set in and.
       TheAnd.setOperand(1, CI);
       return &TheAnd;
@@ -236,7 +236,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     if (CI->getValue() == ShrMask)
       // Masking out bits that the shift already masks.
       return ReplaceInstUsesWith(TheAnd, Op);
-    
+
     if (CI != AndRHS) {
       TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
       return &TheAnd;
@@ -269,22 +269,22 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 
 /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
 /// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
-/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
+/// (V-Lo) \<u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
 Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                      bool isSigned, bool Inside) {
-  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
+  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
             ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
          "Lo is not <= Hi in range emission code!");
-    
+
   if (Inside) {
     if (Lo == Hi)  // Trivially false.
       return ConstantInt::getFalse(V->getContext());
 
     // V >= Min && V < Hi --> V < Hi
     if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-      ICmpInst::Predicate pred = (isSigned ? 
+      ICmpInst::Predicate pred = (isSigned ?
         ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
       return Builder->CreateICmp(pred, V, Hi);
     }
@@ -302,7 +302,7 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
   // V < Min || V >= Hi -> V > Hi-1
   Hi = SubOne(cast<ConstantInt>(Hi));
   if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-    ICmpInst::Predicate pred = (isSigned ? 
+    ICmpInst::Predicate pred = (isSigned ?
         ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
     return Builder->CreateICmp(pred, V, Hi);
   }
@@ -327,14 +327,14 @@ static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
   // look for the first zero bit after the run of ones
   MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
   // look for the first non-zero bit
-  ME = V.getActiveBits(); 
+  ME = V.getActiveBits();
   return true;
 }
 
 /// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
 /// where isSub determines whether the operator is a sub.  If we can fold one of
 /// the following xforms:
-/// 
+///
 /// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
 /// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
 /// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
@@ -355,8 +355,8 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   case Instruction::And:
     if (ConstantExpr::getAnd(N, Mask) == Mask) {
       // If the AndRHS is a power of two minus one (0+1+), this is simple.
-      if ((Mask->getValue().countLeadingZeros() + 
-           Mask->getValue().countPopulation()) == 
+      if ((Mask->getValue().countLeadingZeros() +
+           Mask->getValue().countPopulation()) ==
           Mask->getValue().getBitWidth())
         break;
 
@@ -375,33 +375,33 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   case Instruction::Or:
   case Instruction::Xor:
     // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
-    if ((Mask->getValue().countLeadingZeros() + 
+    if ((Mask->getValue().countLeadingZeros() +
          Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
         && ConstantExpr::getAnd(N, Mask)->isNullValue())
       break;
     return 0;
   }
-  
+
   if (isSub)
     return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
   return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
 }
 
 /// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
-/// One of A and B is considered the mask, the other the value. This is 
-/// described as the "AMask" or "BMask" part of the enum. If the enum 
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
 /// contains only "Mask", then both A and B can be considered masks.
 /// If A is the mask, then it was proven, that (A & C) == C. This
 /// is trivial if C == A, or C == 0. If both A and C are constants, this
 /// proof is also easy.
 /// For the following explanations we assume that A is the mask.
-/// The part "AllOnes" declares, that the comparison is true only 
+/// The part "AllOnes" declares, that the comparison is true only
 /// if (A & B) == A, or all bits of A are set in B.
 ///   Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
-/// The part "AllZeroes" declares, that the comparison is true only 
+/// The part "AllZeroes" declares, that the comparison is true only
 /// if (A & B) == 0, or all bits of A are cleared in B.
 ///   Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
-/// The part "Mixed" declares, that (A & B) == C and C might or might not 
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
 /// contain any number of one bits and zero bits.
 ///   Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
 /// The Part "Not" means, that in above descriptions "==" should be replaced
@@ -425,16 +425,16 @@ enum MaskedICmpType {
 
 /// return the set of pattern classes (from MaskedICmpType)
 /// that (icmp SCC (A & B), C) satisfies
-static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, 
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
                                     ICmpInst::Predicate SCC)
 {
   ConstantInt *ACst = dyn_cast<ConstantInt>(A);
   ConstantInt *BCst = dyn_cast<ConstantInt>(B);
   ConstantInt *CCst = dyn_cast<ConstantInt>(C);
   bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
-  bool icmp_abit = (ACst != 0 && !ACst->isZero() && 
+  bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
                     ACst->getValue().isPowerOf2());
-  bool icmp_bbit = (BCst != 0 && !BCst->isZero() && 
+  bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
                     BCst->getValue().isPowerOf2());
   unsigned result = 0;
   if (CCst != 0 && CCst->isZero()) {
@@ -449,12 +449,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
                           FoldMskICmp_BMask_NotMixed));
     if (icmp_abit)
       result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
-                            FoldMskICmp_AMask_NotMixed) 
+                            FoldMskICmp_AMask_NotMixed)
                          : (FoldMskICmp_AMask_AllOnes |
                             FoldMskICmp_AMask_Mixed));
     if (icmp_bbit)
       result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
-                            FoldMskICmp_BMask_NotMixed) 
+                            FoldMskICmp_BMask_NotMixed)
                          : (FoldMskICmp_BMask_AllOnes |
                             FoldMskICmp_BMask_Mixed));
     return result;
@@ -469,26 +469,23 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
                             FoldMskICmp_AMask_NotMixed)
                          : (FoldMskICmp_Mask_AllZeroes |
                             FoldMskICmp_AMask_Mixed));
-  }
-  else if (ACst != 0 && CCst != 0 &&
-        ConstantExpr::getAnd(ACst, CCst) == CCst) {
+  } else if (ACst != 0 && CCst != 0 &&
+             ConstantExpr::getAnd(ACst, CCst) == CCst) {
     result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
                        : FoldMskICmp_AMask_NotMixed);
   }
-  if (B == C) 
-  {
+  if (B == C) {
     result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
                           FoldMskICmp_BMask_Mixed)
                        : (FoldMskICmp_BMask_NotAllOnes |
                           FoldMskICmp_BMask_NotMixed));
     if (icmp_bbit)
       result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
-                            FoldMskICmp_BMask_NotMixed) 
+                            FoldMskICmp_BMask_NotMixed)
                          : (FoldMskICmp_Mask_AllZeroes |
                             FoldMskICmp_BMask_Mixed));
-  }
-  else if (BCst != 0 && CCst != 0 &&
-        ConstantExpr::getAnd(BCst, CCst) == CCst) {
+  } else if (BCst != 0 && CCst != 0 &&
+             ConstantExpr::getAnd(BCst, CCst) == CCst) {
     result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
                        : FoldMskICmp_BMask_NotMixed);
   }
@@ -531,7 +528,7 @@ static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
 /// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
 /// return the set of pattern classes (from MaskedICmpType)
 /// that both LHS and RHS satisfy
-static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
                                              Value*& B, Value*& C,
                                              Value*& D, Value*& E,
                                              ICmpInst *LHS, ICmpInst *RHS,
@@ -542,10 +539,10 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
   if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
 
   // Here comes the tricky part:
-  // LHS might be of the form L11 & L12 == X, X == L21 & L22, 
+  // LHS might be of the form L11 & L12 == X, X == L21 & L22,
   // and L11 & L12 == L21 & L22. The same goes for RHS.
   // Now we must find those components L** and R**, that are equal, so
-  // that we can extract the parameters A, B, C, D, and E for the canonical 
+  // that we can extract the parameters A, B, C, D, and E for the canonical
   // above.
   Value *L1 = LHS->getOperand(0);
   Value *L2 = LHS->getOperand(1);
@@ -610,14 +607,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
 
   if (L11 == A) {
     B = L12; C = L2;
-  }
-  else if (L12 == A) {
+  } else if (L12 == A) {
     B = L11; C = L2;
-  }
-  else if (L21 == A) {
+  } else if (L21 == A) {
     B = L22; C = L1;
-  }
-  else if (L22 == A) {
+  } else if (L22 == A) {
     B = L21; C = L1;
   }
 
@@ -643,32 +637,32 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
     mask >>= 1; // treat "Not"-states as normal states
 
   if (mask & FoldMskICmp_Mask_AllZeroes) {
-    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 
+    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
     // -> (icmp eq (A & (B|D)), 0)
     Value* newOr = Builder->CreateOr(B, D);
     Value* newAnd = Builder->CreateAnd(A, newOr);
     // we can't use C as zero, because we might actually handle
-    //   (icmp ne (A & B), B) & (icmp ne (A & D), D) 
+    //   (icmp ne (A & B), B) & (icmp ne (A & D), D)
     // with B and D, having a single bit set
     Value* zero = Constant::getNullValue(A->getType());
     return Builder->CreateICmp(NEWCC, newAnd, zero);
   }
-  else if (mask & FoldMskICmp_BMask_AllOnes) {
-    // (icmp eq (A & B), B) & (icmp eq (A & D), D) 
+  if (mask & FoldMskICmp_BMask_AllOnes) {
+    // (icmp eq (A & B), B) & (icmp eq (A & D), D)
     // -> (icmp eq (A & (B|D)), (B|D))
     Value* newOr = Builder->CreateOr(B, D);
     Value* newAnd = Builder->CreateAnd(A, newOr);
     return Builder->CreateICmp(NEWCC, newAnd, newOr);
-  }     
-  else if (mask & FoldMskICmp_AMask_AllOnes) {
-    // (icmp eq (A & B), A) & (icmp eq (A & D), A) 
+  }
+  if (mask & FoldMskICmp_AMask_AllOnes) {
+    // (icmp eq (A & B), A) & (icmp eq (A & D), A)
     // -> (icmp eq (A & (B&D)), A)
     Value* newAnd1 = Builder->CreateAnd(B, D);
     Value* newAnd = Builder->CreateAnd(A, newAnd1);
     return Builder->CreateICmp(NEWCC, newAnd, A);
   }
-  else if (mask & FoldMskICmp_BMask_Mixed) {
-    // (icmp eq (A & B), C) & (icmp eq (A & D), E) 
+  if (mask & FoldMskICmp_BMask_Mixed) {
+    // (icmp eq (A & B), C) & (icmp eq (A & D), E)
     // We already know that B & C == C && D & E == E.
     // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
     // C and E, which are shared by both the mask B and the mask D, don't
@@ -680,7 +674,7 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
     ConstantInt *DCst = dyn_cast<ConstantInt>(D);
     if (DCst == 0) return 0;
     // we can't simply use C and E, because we might actually handle
-    //   (icmp ne (A & B), B) & (icmp eq (A & D), D) 
+    //   (icmp ne (A & B), B) & (icmp eq (A & D), D)
     // with B and D, having a single bit set
 
     ConstantInt *CCst = dyn_cast<ConstantInt>(C);
@@ -727,13 +721,13 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // handle (roughly):  (icmp eq (A & B), C) & (icmp eq (A & D), E)
   if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
     return V;
-  
+
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
   ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
   ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
   if (LHSCst == 0 || RHSCst == 0) return 0;
-  
+
   if (LHSCst == RHSCst && LHSCC == RHSCC) {
     // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
     // where C is a power of 2
@@ -742,7 +736,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
-    
+
     // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
     if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
@@ -759,14 +753,13 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
 
     // (trunc x) == C1 & (and x, CA) == C2
+    // (and x, CA) == C2 & (trunc x) == C1
     if (match(Val2, m_Trunc(m_Value(V))) &&
         match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
       SmallCst = RHSCst;
       BigCst = LHSCst;
-    }
-    // (and x, CA) == C2 & (trunc x) == C1
-    else if (match(Val, m_Trunc(m_Value(V))) &&
-             match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+    } else if (match(Val, m_Trunc(m_Value(V))) &&
+               match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
       SmallCst = LHSCst;
       BigCst = RHSCst;
     }
@@ -789,7 +782,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // From here on, we only handle:
   //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
   if (Val != Val2) return 0;
-  
+
   // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
   if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
       RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
@@ -799,9 +792,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 
   // Make a constant range that's the intersection of the two icmp ranges.
   // If the intersection is empty, we know that the result is false.
-  ConstantRange LHSRange = 
+  ConstantRange LHSRange =
     ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
-  ConstantRange RHSRange = 
+  ConstantRange RHSRange =
     ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
 
   if (LHSRange.intersectWith(RHSRange).isEmptySet())
@@ -810,16 +803,16 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // We can't fold (ugt x, C) & (sgt x, C2).
   if (!PredicatesFoldable(LHSCC, RHSCC))
     return 0;
-    
+
   // Ensure that the larger constant is on the RHS.
   bool ShouldSwap;
   if (CmpInst::isSigned(LHSCC) ||
-      (ICmpInst::isEquality(LHSCC) && 
+      (ICmpInst::isEquality(LHSCC) &&
        CmpInst::isSigned(RHSCC)))
     ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
   else
     ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
-    
+
   if (ShouldSwap) {
     std::swap(LHS, RHS);
     std::swap(LHSCst, RHSCst);
@@ -829,8 +822,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // At this point, we know we have two icmp instructions
   // comparing a value against two constants and and'ing the result
   // together.  Because of the above check, we know that we only have
-  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 
-  // (from the icmp folding check above), that the two constants 
+  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
+  // (from the icmp folding check above), that the two constants
   // are not equal and that the larger constant is on the RHS
   assert(LHSCst != RHSCst && "Compares not folded above?");
 
@@ -932,7 +925,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     }
     break;
   }
- 
+
   return 0;
 }
 
@@ -951,7 +944,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
           return ConstantInt::getFalse(LHS->getContext());
         return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
       }
-    
+
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp ord x,x" is "fcmp ord x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
@@ -959,18 +952,18 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
       return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
     return 0;
   }
-  
+
   Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
   Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
   FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
-  
-  
+
+
   if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
     // Swap RHS operands to match LHS.
     Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
     std::swap(Op1LHS, Op1RHS);
   }
-  
+
   if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
     // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
     if (Op0CC == Op1CC)
@@ -981,7 +974,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
       return RHS;
     if (Op1CC == FCmpInst::FCMP_TRUE)
       return LHS;
-    
+
     bool Op0Ordered;
     bool Op1Ordered;
     unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
@@ -1001,7 +994,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
         return LHS;
       if (Op0Ordered && (Op0Ordered == Op1Ordered))
         return RHS;
-      
+
       // uno && oeq -> uno && (ord && eq) -> false
       if (!Op0Ordered)
         return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
@@ -1025,10 +1018,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return ReplaceInstUsesWith(I, V);
 
-  // See if we can simplify any instructions used by the instruction whose sole 
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
-    return &I;  
+    return &I;
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
     const APInt &AndRHSMask = AndRHS->getValue();
@@ -1043,7 +1036,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       case Instruction::Or: {
         // If the mask is only needed on one incoming arm, push it up.
         if (!Op0I->hasOneUse()) break;
-          
+
         APInt NotAndRHS(~AndRHSMask);
         if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
           // Not masking anything out for the LHS, move to RHS.
@@ -1103,12 +1096,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         }
         break;
       }
-          
+
       if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
         if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
           return Res;
     }
-    
+
     // If this is an integer truncation, and if the source is an 'and' with
     // immediate, transform it.  This frequently occurs for bitfield accesses.
     {
@@ -1116,7 +1109,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
         // Change: and (trunc (and X, YC) to T), C2
         // into  : and (trunc X to T), trunc(YC) & C2
-        // This will fold the two constants together, which may allow 
+        // This will fold the two constants together, which may allow
         // other simplifications.
         Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
         Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
@@ -1143,7 +1136,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                                       I.getName()+".demorgan");
         return BinaryOperator::CreateNot(Or);
       }
-  
+
   {
     Value *A = 0, *B = 0, *C = 0, *D = 0;
     // (A|B) & ~(A&B) -> A^B
@@ -1151,13 +1144,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
         ((A == C && B == D) || (A == D && B == C)))
       return BinaryOperator::CreateXor(A, B);
-    
+
     // ~(A&B) & (A|B) -> A^B
     if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
         match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
         ((A == C && B == D) || (A == D && B == C)))
       return BinaryOperator::CreateXor(A, B);
-    
+
     // A&(A^B) => A & ~B
     {
       Value *tmpOp0 = Op0;
@@ -1193,19 +1186,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
       return BinaryOperator::CreateAnd(A, Op0);
   }
-  
+
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
       if (Value *Res = FoldAndOfICmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-  
+
   // If and'ing two fcmp, try combine them into one.
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
       if (Value *Res = FoldAndOfFCmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-  
-  
+
+
   // fold (and (cast A), (cast B)) -> (cast (and A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
@@ -1214,21 +1207,21 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
           SrcTy == Op1C->getOperand(0)->getType() &&
           SrcTy->isIntOrIntVectorTy()) {
         Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
-        
+
         // Only do this if the casts both really cause code to be generated.
         if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
             ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
           Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
-        
+
         // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
           if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
             if (Value *Res = FoldAndOfICmps(LHS, RHS))
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-        
+
         // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
@@ -1237,21 +1230,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
       }
     }
-    
+
   // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
   if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
     if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
-      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
         Value *NewOp =
           Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
                              SI0->getName());
-        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
                                       SI1->getOperand(1));
       }
   }
 
+  {
+    Value *X = 0;
+    bool OpsSwapped = false;
+    // Canonicalize SExt or Not to the LHS
+    if (match(Op1, m_SExt(m_Value())) ||
+        match(Op1, m_Not(m_Value()))) {
+      std::swap(Op0, Op1);
+      OpsSwapped = true;
+    }
+
+    // Fold (and (sext bool to A), B) --> (select bool, B, 0)
+    if (match(Op0, m_SExt(m_Value(X))) &&
+        X->getType()->getScalarType()->isIntegerTy(1)) {
+      Value *Zero = Constant::getNullValue(Op1->getType());
+      return SelectInst::Create(X, Op1, Zero);
+    }
+
+    // Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
+    if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
+        X->getType()->getScalarType()->isIntegerTy(1)) {
+      Value *Zero = Constant::getNullValue(Op0->getType());
+      return SelectInst::Create(X, Zero, Op1);
+    }
+
+    if (OpsSwapped)
+      std::swap(Op0, Op1);
+  }
+
   return Changed ? &I : 0;
 }
 
@@ -1288,11 +1309,11 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
              CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
                                ByteValues);
     }
-  
+
     // If this is a logical shift by a constant multiple of 8, recurse with
     // OverallLeftShift and ByteMask adjusted.
     if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
-      unsigned ShAmt = 
+      unsigned ShAmt =
         cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
       // Ensure the shift amount is defined and of a byte value.
       if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
@@ -1313,7 +1334,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
       if (OverallLeftShift >= (int)ByteValues.size()) return true;
       if (OverallLeftShift <= -(int)ByteValues.size()) return true;
 
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
                                ByteValues);
     }
 
@@ -1325,20 +1346,20 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
       unsigned NumBytes = ByteValues.size();
       APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
       const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
-      
+
       for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
         // If this byte is masked out by a later operation, we don't care what
         // the and mask is.
         if ((ByteMask & (1 << i)) == 0)
           continue;
-        
+
         // If the AndMask is all zeros for this byte, clear the bit.
         APInt MaskB = AndMask & Byte;
         if (MaskB == 0) {
           ByteMask &= ~(1U << i);
           continue;
         }
-        
+
         // If the AndMask is not all ones for this byte, it's not a bytezap.
         if (MaskB != Byte)
           return true;
@@ -1346,11 +1367,11 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
         // Otherwise, this byte is kept.
       }
 
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
                                ByteValues);
     }
   }
-  
+
   // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
   // the input value to the bswap.  Some observations: 1) if more than one byte
   // is demanded from this input, then it could not be successfully assembled
@@ -1358,7 +1379,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
   // their ultimate destination.
   if (!isPowerOf2_32(ByteMask)) return true;
   unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
-  
+
   // 2) The input and ultimate destinations must line up: if byte 3 of an i32
   // is demanded, it needs to go into byte 0 of the result.  This means that the
   // byte needs to be shifted until it lands in the right byte bucket.  The
@@ -1368,7 +1389,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
   unsigned DestByteNo = InputByteNo + OverallLeftShift;
   if (ByteValues.size()-1-DestByteNo != InputByteNo)
     return true;
-  
+
   // If the destination byte value is already defined, the values are or'd
   // together, which isn't a bswap (unless it's an or of the same bits).
   if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
@@ -1381,25 +1402,25 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
 /// If so, insert the new bswap intrinsic and return it.
 Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
-  if (!ITy || ITy->getBitWidth() % 16 || 
+  if (!ITy || ITy->getBitWidth() % 16 ||
       // ByteMask only allows up to 32-byte values.
-      ITy->getBitWidth() > 32*8) 
+      ITy->getBitWidth() > 32*8)
     return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
-  
+
   /// ByteValues - For each byte of the result, we keep track of which value
   /// defines each byte.
   SmallVector<Value*, 8> ByteValues;
   ByteValues.resize(ITy->getBitWidth()/8);
-    
+
   // Try to find all the pieces corresponding to the bswap.
   uint32_t ByteMask = ~0U >> (32-ByteValues.size());
   if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
     return 0;
-  
+
   // Check to see if all of the bytes come from the same value.
   Value *V = ByteValues[0];
   if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
-  
+
   // Check to make sure that all of the bytes come from the same value.
   for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
     if (ByteValues[i] != V)
@@ -1425,7 +1446,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
     return SelectInst::Create(Cond, C, B);
   if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, B);
-  
+
   // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
   if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, D);
@@ -1483,33 +1504,33 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // From here on, we only handle:
   //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
   if (Val != Val2) return 0;
-  
+
   // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
   if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
       RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
       LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
       RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
     return 0;
-  
+
   // We can't fold (ugt x, C) | (sgt x, C2).
   if (!PredicatesFoldable(LHSCC, RHSCC))
     return 0;
-  
+
   // Ensure that the larger constant is on the RHS.
   bool ShouldSwap;
   if (CmpInst::isSigned(LHSCC) ||
-      (ICmpInst::isEquality(LHSCC) && 
+      (ICmpInst::isEquality(LHSCC) &&
        CmpInst::isSigned(RHSCC)))
     ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
   else
     ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
-  
+
   if (ShouldSwap) {
     std::swap(LHS, RHS);
     std::swap(LHSCst, RHSCst);
     std::swap(LHSCC, RHSCC);
   }
-  
+
   // At this point, we know we have two icmp instructions
   // comparing a value against two constants and or'ing the result
   // together.  Because of the above check, we know that we only have
@@ -1531,6 +1552,20 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
         AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
         return Builder->CreateICmpULT(Add, AddCST);
       }
+
+      if (LHS->getOperand(0) == RHS->getOperand(0)) {
+        // if LHSCst and RHSCst differ only by one bit:
+        // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
+        assert(LHSCst->getValue().ule(LHSCst->getValue()));
+
+        APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
+        if (Xor.isPowerOf2()) {
+          Value *NegCst = Builder->getInt(~Xor);
+          Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst);
+          return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst);
+        }
+      }
+
       break;                         // (X == 13 | X == 15) -> no change
     case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
     case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
@@ -1632,7 +1667,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 /// function.
 Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+      RHS->getPredicate() == FCmpInst::FCMP_UNO &&
       LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
     if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
       if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1640,25 +1675,25 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
         // true.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
           return ConstantInt::getTrue(LHS->getContext());
-        
+
         // Otherwise, no need to compare the two constants, compare the
         // rest.
         return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
       }
-    
+
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp uno x,x" is "fcmp uno x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
       return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
-    
+
     return 0;
   }
-  
+
   Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
   Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
   FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
-  
+
   if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
     // Swap RHS operands to match LHS.
     Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
@@ -1692,7 +1727,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
 ///     ((A | B) & C1) | (B & C2)
 ///
 /// into:
-/// 
+///
 ///     (A & C1) | B
 ///
 /// when the XOR of the two constants is "all ones" (-1).
@@ -1727,7 +1762,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return ReplaceInstUsesWith(I, V);
 
-  // See if we can simplify any instructions used by the instruction whose sole 
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
@@ -1741,7 +1776,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         Op0->hasOneUse()) {
       Value *Or = Builder->CreateOr(X, RHS);
       Or->takeName(Op0);
-      return BinaryOperator::CreateAnd(Or, 
+      return BinaryOperator::CreateAnd(Or,
                          ConstantInt::get(I.getContext(),
                                           RHS->getValue() | C1->getValue()));
     }
@@ -1778,7 +1813,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (Instruction *BSwap = MatchBSwap(I))
       return BSwap;
   }
-  
+
   // (X^C)|Y -> (X|Y)^C iff Y&C == 0
   if (Op0->hasOneUse() &&
       match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
@@ -1827,7 +1862,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
             return ReplaceInstUsesWith(I, B);
         }
       }
-      
+
       if ((C1->getValue() & C2->getValue()) == 0) {
         // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
         // iff (C1&C2) == 0 and (N&~C1) == 0
@@ -1844,7 +1879,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
           return BinaryOperator::CreateAnd(B,
                                ConstantInt::get(B->getContext(),
                                                 C1->getValue()|C2->getValue()));
-        
+
         // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
         // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
         ConstantInt *C3 = 0, *C4 = 0;
@@ -1904,16 +1939,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       if (Ret) return Ret;
     }
   }
-  
+
   // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
   if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
     if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
-      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
         Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
                                          SI0->getName());
-        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
                                       SI1->getOperand(1));
       }
   }
@@ -1975,13 +2010,13 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
       if (Value *Res = FoldOrOfICmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-    
+
   // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
       if (Value *Res = FoldOrOfFCmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-  
+
   // fold (or (cast A), (cast B)) -> (cast (or A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
     CastInst *Op1C = dyn_cast<CastInst>(Op1);
@@ -1999,14 +2034,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
           Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
-        
+
         // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
           if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
             if (Value *Res = FoldOrOfICmps(LHS, RHS))
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-        
+
         // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
@@ -2035,7 +2070,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     Inner->takeName(Op0);
     return BinaryOperator::CreateOr(Inner, C1);
   }
-  
+
+  // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
+  // Since this OR statement hasn't been optimized further yet, we hope
+  // that this transformation will allow the new ORs to be optimized.
+  {
+    Value *X = 0, *Y = 0;
+    if (Op0->hasOneUse() && Op1->hasOneUse() &&
+        match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
+        match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
+      Value *orTrue = Builder->CreateOr(A, C);
+      Value *orFalse = Builder->CreateOr(B, D);
+      return SelectInst::Create(X, orTrue, orFalse);
+    }
+  }
+
   return Changed ? &I : 0;
 }
 
@@ -2050,7 +2099,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return ReplaceInstUsesWith(I, V);
 
-  // See if we can simplify any instructions used by the instruction whose sole 
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
@@ -2058,7 +2107,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   // Is this a ~ operation?
   if (Value *NotOp = dyn_castNotVal(&I)) {
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
-      if (Op0I->getOpcode() == Instruction::And || 
+      if (Op0I->getOpcode() == Instruction::And ||
           Op0I->getOpcode() == Instruction::Or) {
         // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
         // ~(~X | Y) === (X & ~Y) - De Morgan's Law
@@ -2072,10 +2121,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             return BinaryOperator::CreateOr(Op0NotVal, NotY);
           return BinaryOperator::CreateAnd(Op0NotVal, NotY);
         }
-        
+
         // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
         // ~(X | Y) === (~X & ~Y) - De Morgan's Law
-        if (isFreeToInvert(Op0I->getOperand(0)) && 
+        if (isFreeToInvert(Op0I->getOperand(0)) &&
             isFreeToInvert(Op0I->getOperand(1))) {
           Value *NotX =
             Builder->CreateNot(Op0I->getOperand(0), "notlhs");
@@ -2093,8 +2142,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       }
     }
   }
-  
-  
+
+
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     if (RHS->isOne() && Op0->hasOneUse())
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
@@ -2109,7 +2158,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (CI->hasOneUse() && Op0C->hasOneUse()) {
           Instruction::CastOps Opcode = Op0C->getOpcode();
           if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
-              (RHS == ConstantExpr::getCast(Opcode, 
+              (RHS == ConstantExpr::getCast(Opcode,
                                            ConstantInt::getTrue(I.getContext()),
                                             Op0C->getDestTy()))) {
             CI->setPredicate(CI->getInversePredicate());
@@ -2128,7 +2177,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                                       ConstantInt::get(I.getType(), 1));
           return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
         }
-          
+
       if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
         if (Op0I->getOpcode() == Instruction::Add) {
           // ~(X-c) --> (-c-1)-X
@@ -2152,13 +2201,34 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             // Anything in both C1 and C2 is known to be zero, remove it from
             // NewRHS.
             Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
-            NewRHS = ConstantExpr::getAnd(NewRHS, 
+            NewRHS = ConstantExpr::getAnd(NewRHS,
                                        ConstantExpr::getNot(CommonBits));
             Worklist.Add(Op0I);
             I.setOperand(0, Op0I->getOperand(0));
             I.setOperand(1, NewRHS);
             return &I;
           }
+        } else if (Op0I->getOpcode() == Instruction::LShr) {
+          // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+          // E1 = "X ^ C1"
+          BinaryOperator *E1;
+          ConstantInt *C1;
+          if (Op0I->hasOneUse() &&
+              (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) &&
+              E1->getOpcode() == Instruction::Xor &&
+              (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
+            // fold (C1 >> C2) ^ C3
+            ConstantInt *C2 = Op0CI, *C3 = RHS;
+            APInt FoldConst = C1->getValue().lshr(C2->getValue());
+            FoldConst ^= C3->getValue();
+            // Prepare the two operands.
+            Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2);
+            Opnd0->takeName(Op0I);
+            cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
+            Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
+
+            return BinaryOperator::CreateXor(Opnd0, FoldVal);
+          }
         }
       }
     }
@@ -2184,7 +2254,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         I.swapOperands();     // Simplified below.
         std::swap(Op0, Op1);
       }
-    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
+    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
                Op1I->hasOneUse()){
       if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
         Op1I->swapOperands();
@@ -2196,7 +2266,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       }
     }
   }
-  
+
   BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
   if (Op0I) {
     Value *A, *B;
@@ -2206,7 +2276,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         std::swap(A, B);
       if (B == Op1)                                  // (A|B)^B == A & ~B
         return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
-    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
+    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
                Op0I->hasOneUse()){
       if (A == Op1)                                        // (A&B)^A -> (B&A)^A
         std::swap(A, B);
@@ -2216,31 +2286,31 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       }
     }
   }
-  
+
   // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
-  if (Op0I && Op1I && Op0I->isShift() && 
-      Op0I->getOpcode() == Op1I->getOpcode() && 
+  if (Op0I && Op1I && Op0I->isShift() &&
+      Op0I->getOpcode() == Op1I->getOpcode() &&
       Op0I->getOperand(1) == Op1I->getOperand(1) &&
       (Op0I->hasOneUse() || Op1I->hasOneUse())) {
     Value *NewOp =
       Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
                          Op0I->getName());
-    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
+    return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
                                   Op1I->getOperand(1));
   }
-    
+
   if (Op0I && Op1I) {
     Value *A, *B, *C, *D;
     // (A & B)^(A | B) -> A ^ B
     if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
         match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
-      if ((A == C && B == D) || (A == D && B == C)) 
+      if ((A == C && B == D) || (A == D && B == C))
         return BinaryOperator::CreateXor(A, B);
     }
     // (A | B)^(A & B) -> A ^ B
     if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
         match(Op1I, m_And(m_Value(C), m_Value(D)))) {
-      if ((A == C && B == D) || (A == D && B == C)) 
+      if ((A == C && B == D) || (A == D && B == C))
         return BinaryOperator::CreateXor(A, B);
     }
   }
@@ -2257,7 +2327,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
           Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
           unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
           bool isSigned = LHS->isSigned() || RHS->isSigned();
-          return ReplaceInstUsesWith(I, 
+          return ReplaceInstUsesWith(I,
                                getNewICmpValue(isSigned, Code, Op0, Op1,
                                                Builder));
         }
@@ -2270,9 +2340,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         Type *SrcTy = Op0C->getOperand(0)->getType();
         if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
             // Only do this if the casts both really cause code to be generated.
-            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
                                I.getType()) &&
-            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
                                I.getType())) {
           Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
                                             Op1C->getOperand(0), I.getName());
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 48f2704..64cd1bd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,12 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
+using namespace PatternMatch;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
 
 /// getPromotedType - Return the specified type promoted as it would be to pass
 /// though a va_arg area.
@@ -273,25 +278,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
     return 0;
   }
-  case Intrinsic::bswap:
+  case Intrinsic::bswap: {
+    Value *IIOperand = II->getArgOperand(0);
+    Value *X = 0;
+
     // bswap(bswap(x)) -> x
-    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
-      if (Operand->getIntrinsicID() == Intrinsic::bswap)
-        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
+    if (match(IIOperand, m_BSwap(m_Value(X))))
+        return ReplaceInstUsesWith(CI, X);
 
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
-    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
-      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
-        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
-          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
-                       TI->getType()->getPrimitiveSizeInBits();
-          Value *CV = ConstantInt::get(Operand->getType(), C);
-          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
-          return new TruncInst(V, TI->getType());
-        }
+    if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
+      unsigned C = X->getType()->getPrimitiveSizeInBits() -
+        IIOperand->getType()->getPrimitiveSizeInBits();
+      Value *CV = ConstantInt::get(X->getType(), C);
+      Value *V = Builder->CreateLShr(X, CV);
+      return new TruncInst(V, IIOperand->getType());
     }
-
     break;
+  }
+
   case Intrinsic::powi:
     if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // powi(x, 0) -> 1.0
@@ -690,7 +695,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         if (Splat->isOne()) {
           if (Zext)
             return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
-          // else    
+          // else
           return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
         }
       }
@@ -785,8 +790,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
   if (CI->getCalledFunction() == 0) return 0;
 
-  if (Value *With = Simplifier->optimizeCall(CI))
-    return ReplaceInstUsesWith(*CI, With);
+  if (Value *With = Simplifier->optimizeCall(CI)) {
+    ++NumSimplified;
+    return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
+  }
 
   return 0;
 }
@@ -894,7 +901,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       new StoreInst(ConstantInt::getTrue(Callee->getContext()),
                 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
                                   OldCall);
-      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // If OldCall does not return void then replaceAllUsesWith undef.
       // This allows ValueHandlers and custom metadata to adjust itself.
       if (!OldCall->getType()->isVoidTy())
         ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
@@ -977,7 +984,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   if (Callee == 0)
     return false;
   Instruction *Caller = CS.getInstruction();
-  const AttrListPtr &CallerPAL = CS.getAttributes();
+  const AttributeSet &CallerPAL = CS.getAttributes();
 
   // Okay, this is a cast from a function to a different type.  Unless doing so
   // would cause a type conversion of one of our arguments, change this call to
@@ -1007,8 +1014,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       return false;   // Cannot transform this return value.
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
-      AttrBuilder RAttrs = CallerPAL.getRetAttributes();
-      if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy)))
+      AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+      if (RAttrs.
+          hasAttributes(AttributeFuncs::
+                        typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+                        AttributeSet::ReturnIndex))
         return false;   // Attribute not compatible with transformed value.
     }
 
@@ -1037,14 +1047,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (!CastInst::isCastable(ActTy, ParamTy))
       return false;   // Cannot transform this parameter value.
 
-    Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
-    if (AttrBuilder(Attrs).
-          hasAttributes(Attributes::typeIncompatible(ParamTy)))
+    if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
+          hasAttributes(AttributeFuncs::
+                        typeIncompatible(ParamTy, i + 1), i + 1))
       return false;   // Attribute not compatible with transformed value.
 
     // If the parameter is passed as a byval argument, then we have to have a
     // sized type and the sized type has to have the same size as the old type.
-    if (ParamTy != ActTy && Attrs.hasAttribute(Attributes::ByVal)) {
+    if (ParamTy != ActTy &&
+        CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
+                                                         Attribute::ByVal)) {
       PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
       if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
         return false;
@@ -1093,10 +1105,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     // won't be dropping them.  Check that these extra arguments have attributes
     // that are compatible with being a vararg call argument.
     for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
-      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+      unsigned Index = CallerPAL.getSlotIndex(i - 1);
+      if (Index <= FT->getNumParams())
         break;
-      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
-      if (PAttrs.hasIncompatibleWithVarArgsAttrs())
+
+      // Check if it has an attribute that's incompatible with varargs.
+      AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
+      if (PAttrs.hasAttribute(Index, Attribute::StructRet))
         return false;
     }
 
@@ -1105,21 +1120,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // inserting cast instructions as necessary.
   std::vector<Value*> Args;
   Args.reserve(NumActualArgs);
-  SmallVector<AttributeWithIndex, 8> attrVec;
+  SmallVector<AttributeSet, 8> attrVec;
   attrVec.reserve(NumCommonArgs);
 
   // Get any return attributes.
-  AttrBuilder RAttrs = CallerPAL.getRetAttributes();
+  AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
 
   // If the return value is not being used, the type may not be compatible
   // with the existing attributes.  Wipe out any problematic attributes.
-  RAttrs.removeAttributes(Attributes::typeIncompatible(NewRetTy));
+  RAttrs.
+    removeAttributes(AttributeFuncs::
+                     typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+                     AttributeSet::ReturnIndex);
 
   // Add the new return attributes.
   if (RAttrs.hasAttributes())
-    attrVec.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                              Attributes::get(FT->getContext(), RAttrs)));
+    attrVec.push_back(AttributeSet::get(Caller->getContext(),
+                                        AttributeSet::ReturnIndex, RAttrs));
 
   AI = CS.arg_begin();
   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -1133,9 +1150,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     }
 
     // Add any parameter attributes.
-    Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+    AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
     if (PAttrs.hasAttributes())
-      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+      attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
+                                          PAttrs));
   }
 
   // If the function takes more arguments than the call was taking, add them
@@ -1145,10 +1163,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   // If we are removing arguments to the function, emit an obnoxious warning.
   if (FT->getNumParams() < NumActualArgs) {
-    if (!FT->isVarArg()) {
-      errs() << "WARNING: While resolving call to function '"
-             << Callee->getName() << "' arguments were dropped!\n";
-    } else {
+    // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
+    if (FT->isVarArg()) {
       // Add all of the arguments in their promoted form to the arg list.
       for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
         Type *PTy = getPromotedType((*AI)->getType());
@@ -1162,23 +1178,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         }
 
         // Add any parameter attributes.
-        Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+        AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
         if (PAttrs.hasAttributes())
-          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+          attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
+                                              PAttrs));
       }
     }
   }
 
-  Attributes FnAttrs = CallerPAL.getFnAttributes();
-  if (FnAttrs.hasAttributes())
-    attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                              FnAttrs));
+  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
+  if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
+    attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
 
   if (NewRetTy->isVoidTy())
     Caller->setName("");   // Void type should not have a name.
 
-  const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(),
-                                                     attrVec);
+  const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
+                                                       attrVec);
 
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -1238,13 +1254,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   Value *Callee = CS.getCalledValue();
   PointerType *PTy = cast<PointerType>(Callee->getType());
   FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-  const AttrListPtr &Attrs = CS.getAttributes();
+  const AttributeSet &Attrs = CS.getAttributes();
 
   // If the call already has the 'nest' attribute somewhere then give up -
   // otherwise 'nest' would occur twice after splicing in the chain.
-  for (unsigned I = 0, E = Attrs.getNumAttrs(); I != E; ++I)
-    if (Attrs.getAttributesAtIndex(I).hasAttribute(Attributes::Nest))
-      return 0;
+  if (Attrs.hasAttrSomewhere(Attribute::Nest))
+    return 0;
 
   assert(Tramp &&
          "transformCallThroughTrampoline called with incorrect CallSite.");
@@ -1253,16 +1268,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   PointerType *NestFPTy = cast<PointerType>(NestF->getType());
   FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
 
-  const AttrListPtr &NestAttrs = NestF->getAttributes();
+  const AttributeSet &NestAttrs = NestF->getAttributes();
   if (!NestAttrs.isEmpty()) {
     unsigned NestIdx = 1;
     Type *NestTy = 0;
-    Attributes NestAttr;
+    AttributeSet NestAttr;
 
     // Look for a parameter marked with the 'nest' attribute.
     for (FunctionType::param_iterator I = NestFTy->param_begin(),
          E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
-      if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attributes::Nest)){
+      if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
         // Record the parameter type and any other attributes.
         NestTy = *I;
         NestAttr = NestAttrs.getParamAttributes(NestIdx);
@@ -1274,17 +1289,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       std::vector<Value*> NewArgs;
       NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
 
-      SmallVector<AttributeWithIndex, 8> NewAttrs;
+      SmallVector<AttributeSet, 8> NewAttrs;
       NewAttrs.reserve(Attrs.getNumSlots() + 1);
 
       // Insert the nest argument into the call argument list, which may
       // mean appending it.  Likewise for attributes.
 
       // Add any result attributes.
-      Attributes Attr = Attrs.getRetAttributes();
-      if (Attr.hasAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                                                   Attr));
+      if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
+        NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+                                             Attrs.getRetAttributes()));
 
       {
         unsigned Idx = 1;
@@ -1296,7 +1310,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
             if (NestVal->getType() != NestTy)
               NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
             NewArgs.push_back(NestVal);
-            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+            NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+                                                 NestAttr));
           }
 
           if (I == E)
@@ -1304,20 +1319,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
 
           // Add the original argument and attributes.
           NewArgs.push_back(*I);
-          Attr = Attrs.getParamAttributes(Idx);
-          if (Attr.hasAttributes())
-            NewAttrs.push_back
-              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+          AttributeSet Attr = Attrs.getParamAttributes(Idx);
+          if (Attr.hasAttributes(Idx)) {
+            AttrBuilder B(Attr, Idx);
+            NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+                                                 Idx + (Idx >= NestIdx), B));
+          }
 
           ++Idx, ++I;
         } while (1);
       }
 
       // Add any function attributes.
-      Attr = Attrs.getFnAttributes();
-      if (Attr.hasAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                                   Attr));
+      if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+        NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
+                                             Attrs.getFnAttributes()));
 
       // The trampoline may have been bitcast to a bogus type (FTy).
       // Handle this by synthesizing a new function type, equal to FTy
@@ -1356,7 +1372,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
         NestF->getType() == PointerType::getUnqual(NewFTy) ?
         NestF : ConstantExpr::getBitCast(NestF,
                                          PointerType::getUnqual(NewFTy));
-      const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs);
+      const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index bb59db8..2ee1278 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -13,9 +13,9 @@
 
 #include "InstCombine.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
     Scale  = 0;
     return ConstantInt::get(Val->getType(), 0);
   }
-  
+
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
     // Cannot look past anything that might overflow.
     OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
@@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
         Offset = 0;
         return I->getOperand(0);
       }
-      
+
       if (I->getOpcode() == Instruction::Mul) {
         // This value is scaled by 'RHS'.
         Scale = RHS->getZExtValue();
         Offset = 0;
         return I->getOperand(0);
       }
-      
+
       if (I->getOpcode() == Instruction::Add) {
-        // We have X+C.  Check to see if we really have (X*C2)+C1, 
+        // We have X+C.  Check to see if we really have (X*C2)+C1,
         // where C1 is divisible by C2.
         unsigned SubScale;
-        Value *SubVal = 
+        Value *SubVal =
           DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
         Offset += RHS->getZExtValue();
         Scale = SubScale;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   if (!TD) return 0;
 
   PointerType *PTy = cast<PointerType>(CI.getType());
-  
+
   BuilderTy AllocaBuilder(*Builder);
   AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
 
@@ -104,13 +104,19 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
   if (CastElTySize == 0 || AllocElTySize == 0) return 0;
 
+  // If the allocation has multiple uses, only promote it if we're not
+  // shrinking the amount of memory being allocated.
+  uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+  uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+  if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
   // See if we can satisfy the modulus by pulling a scale out of the array
   // size argument.
   unsigned ArraySizeScale;
   uint64_t ArrayOffset;
   Value *NumElements = // See if the array size is a decomposable linear expr.
     DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
- 
+
   // If we can now satisfy the modulus, by using a non-1 scale, we really can
   // do the xform.
   if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
@@ -125,17 +131,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
     // Insert before the alloca, not before the cast.
     Amt = AllocaBuilder.CreateMul(Amt, NumElements);
   }
-  
+
   if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
     Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
                                   Offset, true);
     Amt = AllocaBuilder.CreateAdd(Amt, Off);
   }
-  
+
   AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
   New->setAlignment(AI.getAlignment());
   New->takeName(&AI);
-  
+
   // If the allocation has multiple real uses, insert a cast and change all
   // things that used it to use the new cast.  This will also hack on CI, but it
   // will die soon.
@@ -148,10 +154,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   return ReplaceInstUsesWith(CI, New);
 }
 
-/// EvaluateInDifferentType - Given an expression that 
+/// EvaluateInDifferentType - Given an expression that
 /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
 /// insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, 
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V)) {
     C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
@@ -181,7 +187,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
     Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
     break;
-  }    
+  }
   case Instruction::Trunc:
   case Instruction::ZExt:
   case Instruction::SExt:
@@ -190,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     // new.
     if (I->getOperand(0)->getType() == Ty)
       return I->getOperand(0);
-    
+
     // Otherwise, must be the same type of cast, so just reinsert a new one.
     // This also handles the case of zext(trunc(x)) -> zext(x).
     Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
@@ -212,11 +218,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     Res = NPN;
     break;
   }
-  default: 
+  default:
     // TODO: Can handle more cases here.
     llvm_unreachable("Unreachable!");
   }
-  
+
   Res->takeName(I);
   return InsertNewInstWith(Res, *I);
 }
@@ -224,7 +230,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
 
 /// This function is a wrapper around CastInst::isEliminableCastPair. It
 /// simply extracts arguments and returns what that function returns.
-static Instruction::CastOps 
+static Instruction::CastOps
 isEliminableCastPair(
   const CastInst *CI, ///< The first cast instruction
   unsigned opcode,       ///< The opcode of the second cast instruction
@@ -253,7 +259,7 @@ isEliminableCastPair(
   if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
       (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
     Res = 0;
-  
+
   return Instruction::CastOps(Res);
 }
 
@@ -265,18 +271,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
                                       Type *Ty) {
   // Noop casts and casts of constants should be eliminated trivially.
   if (V->getType() == Ty || isa<Constant>(V)) return false;
-  
+
   // If this is another cast that can be eliminated, we prefer to have it
   // eliminated.
   if (const CastInst *CI = dyn_cast<CastInst>(V))
     if (isEliminableCastPair(CI, opc, Ty, TD))
       return false;
-  
+
   // If this is a vector sext from a compare, then we don't want to break the
   // idiom where each element of the extended vector is either zero or all ones.
   if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
     return false;
-  
+
   return true;
 }
 
@@ -288,7 +294,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
   // eliminate it now.
   if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
-    if (Instruction::CastOps opc = 
+    if (Instruction::CastOps opc =
         isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
       // The first cast (CSrc) is eliminable so we need to fix up or replace
       // the second cast (CI). CSrc will then have a good chance of being dead.
@@ -311,7 +317,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
       if (Instruction *NV = FoldOpIntoPhi(CI))
         return NV;
   }
-  
+
   return 0;
 }
 
@@ -330,15 +336,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
   // We can always evaluate constants in another type.
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   Type *OrigTy = V->getType();
-  
+
   // If this is an extension from the dest type, we can eliminate it, even if it
   // has multiple uses.
-  if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 
+  if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
       I->getOperand(0)->getType() == Ty)
     return true;
 
@@ -423,29 +429,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
     // TODO: Can handle more cases here.
     break;
   }
-  
+
   return false;
 }
 
 Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
-  
-  // See if we can simplify any instructions used by the input whose sole 
+
+  // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
     return &CI;
-  
+
   Value *Src = CI.getOperand(0);
   Type *DestTy = CI.getType(), *SrcTy = Src->getType();
-  
+
   // Attempt to truncate the entire input expression tree to the destination
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
   // strange.
   if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
       CanEvaluateTruncated(Src, DestTy)) {
-      
+
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -462,7 +468,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
-  
+
   // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
   Value *A = 0; ConstantInt *Cst = 0;
   if (Src->hasOneUse() &&
@@ -472,7 +478,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     // ASize < MidSize   and MidSize > ResultSize, but don't know the relation
     // between ASize and ResultSize.
     unsigned ASize = A->getType()->getPrimitiveSizeInBits();
-    
+
     // If the shift amount is larger than the size of A, then the result is
     // known to be zero because all the input bits got shifted out.
     if (Cst->getZExtValue() >= ASize)
@@ -485,7 +491,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     Shift->takeName(Src);
     return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
   }
-  
+
   // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
   // type isn't non-native.
   if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
@@ -508,7 +514,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
   // cast to integer to avoid the comparison.
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
     const APInt &Op1CV = Op1C->getValue();
-      
+
     // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
     // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
     if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
@@ -538,14 +544,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
     // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
     // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
     // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+    if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
         // This only works for EQ and NE
         ICI->isEquality()) {
       // If Op1C some other power of two, convert:
       uint32_t BitWidth = Op1C->getType()->getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
       ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
-        
+
       APInt KnownZeroMask(~KnownZero);
       if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
         if (!DoXform) return ICI;
@@ -559,7 +565,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
           Res = ConstantExpr::getZExt(Res, CI.getType());
           return ReplaceInstUsesWith(CI, Res);
         }
-          
+
         uint32_t ShiftAmt = KnownZeroMask.logBase2();
         Value *In = ICI->getOperand(0);
         if (ShiftAmt) {
@@ -568,12 +574,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
           In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
                                    In->getName()+".lobit");
         }
-          
+
         if ((Op1CV != 0) == isNE) { // Toggle the low bit.
           Constant *One = ConstantInt::get(In->getType(), 1);
           In = Builder->CreateXor(In, One);
         }
-          
+
         if (CI.getType() == In->getType())
           return ReplaceInstUsesWith(CI, In);
         return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
@@ -646,19 +652,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
   BitsToClear = 0;
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // If the input is a truncate from the destination type, we can trivially
   // eliminate it.
   if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
     return true;
-  
+
   // We can't extend or shrink something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
   if (!I->hasOneUse()) return false;
-  
+
   unsigned Opc = I->getOpcode(), Tmp;
   switch (Opc) {
   case Instruction::ZExt:  // zext(zext(x)) -> zext(x).
@@ -678,7 +684,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
     // These can all be promoted if neither operand has 'bits to clear'.
     if (BitsToClear == 0 && Tmp == 0)
       return true;
-      
+
     // If the operation is an AND/OR/XOR and the bits to clear are zero in the
     // other side, BitsToClear is ok.
     if (Tmp == 0 &&
@@ -691,10 +697,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
                             APInt::getHighBitsSet(VSize, BitsToClear)))
         return true;
     }
-      
+
     // Otherwise, we don't know how to analyze this BitsToClear case yet.
     return false;
-      
+
   case Instruction::LShr:
     // We can promote lshr(x, cst) if we can promote x.  This requires the
     // ultimate 'and' to clear out the high zero bits we're clearing out though.
@@ -716,7 +722,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
         Tmp != BitsToClear)
       return false;
     return true;
-      
+
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
     // get into trouble with cyclic PHIs here because we only consider
@@ -739,48 +745,48 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
 }
 
 Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
-  // If this zero extend is only used by a truncate, let the truncate by
+  // If this zero extend is only used by a truncate, let the truncate be
   // eliminated before we try to optimize this zext.
   if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
     return 0;
-  
+
   // If one of the common conversion will work, do it.
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
 
-  // See if we can simplify any instructions used by the input whose sole 
+  // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
     return &CI;
-  
+
   Value *Src = CI.getOperand(0);
   Type *SrcTy = Src->getType(), *DestTy = CI.getType();
-  
+
   // Attempt to extend the entire input expression tree to the destination
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
   // strange.
   unsigned BitsToClear;
   if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateZExtd(Src, DestTy, BitsToClear)) { 
+      CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
     assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
            "Unreasonable BitsToClear");
-    
+
     // Okay, we can transform this!  Insert the new expression now.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
           " to avoid zero extend: " << CI);
     Value *Res = EvaluateInDifferentType(Src, DestTy, false);
     assert(Res->getType() == DestTy);
-    
+
     uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
     uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-    
+
     // If the high bits are already filled with zeros, just replace this
     // cast with the result.
     if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
                                                      DestBitSize-SrcBitsKept)))
       return ReplaceInstUsesWith(CI, Res);
-    
+
     // We need to emit an AND to clear the high bits.
     Constant *C = ConstantInt::get(Res->getType(),
                                APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
@@ -792,7 +798,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   // 'and' which will be much cheaper than the pair of casts.
   if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) {   // A->B->C cast
     // TODO: Subsume this into EvaluateInDifferentType.
-    
+
     // Get the sizes of the types involved.  We know that the intermediate type
     // will be smaller than A or C, but don't know the relation between A and C.
     Value *A = CSrc->getOperand(0);
@@ -809,7 +815,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
       Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
       return new ZExtInst(And, CI.getType());
     }
-    
+
     if (SrcSize == DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
       return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
@@ -818,7 +824,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     if (SrcSize > DstSize) {
       Value *Trunc = Builder->CreateTrunc(A, CI.getType());
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
-      return BinaryOperator::CreateAnd(Trunc, 
+      return BinaryOperator::CreateAnd(Trunc,
                                        ConstantInt::get(Trunc->getType(),
                                                         AndValue));
     }
@@ -876,7 +882,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     Value *New = Builder->CreateZExt(X, CI.getType());
     return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
   }
-  
+
   return 0;
 }
 
@@ -989,14 +995,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
   // If this is a constant, it can be trivially promoted.
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // If this is a truncate from the dest type, we can trivially eliminate it.
   if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
     return true;
-  
+
   // We can't extend or shrink something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
   if (!I->hasOneUse()) return false;
@@ -1015,14 +1021,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
     // These operators can all arbitrarily be extended if their inputs can.
     return CanEvaluateSExtd(I->getOperand(0), Ty) &&
            CanEvaluateSExtd(I->getOperand(1), Ty);
-      
+
   //case Instruction::Shl:   TODO
   //case Instruction::LShr:  TODO
-      
+
   case Instruction::Select:
     return CanEvaluateSExtd(I->getOperand(1), Ty) &&
            CanEvaluateSExtd(I->getOperand(2), Ty);
-      
+
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
     // get into trouble with cyclic PHIs here because we only consider
@@ -1036,24 +1042,24 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
     // TODO: Can handle more cases here.
     break;
   }
-  
+
   return false;
 }
 
 Instruction *InstCombiner::visitSExt(SExtInst &CI) {
-  // If this sign extend is only used by a truncate, let the truncate by
-  // eliminated before we try to optimize this zext.
+  // If this sign extend is only used by a truncate, let the truncate be
+  // eliminated before we try to optimize this sext.
   if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
     return 0;
-  
+
   if (Instruction *I = commonCastTransforms(CI))
     return I;
-  
-  // See if we can simplify any instructions used by the input whose sole 
+
+  // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
     return &CI;
-  
+
   Value *Src = CI.getOperand(0);
   Type *SrcTy = Src->getType(), *DestTy = CI.getType();
 
@@ -1076,7 +1082,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     // cast with the result.
     if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
       return ReplaceInstUsesWith(CI, Res);
-    
+
     // We need to emit a shl + ashr to do the sign extend.
     Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
     return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
@@ -1089,7 +1095,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
       uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
       uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-      
+
       // We need to emit a shl + ashr to do the sign extend.
       Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
       Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
@@ -1125,7 +1131,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     A = Builder->CreateShl(A, ShAmtV, CI.getName());
     return BinaryOperator::CreateAShr(A, ShAmtV);
   }
-  
+
   return 0;
 }
 
@@ -1147,7 +1153,7 @@ static Value *LookThroughFPExtensions(Value *V) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::FPExt)
       return LookThroughFPExtensions(I->getOperand(0));
-  
+
   // If this value is a constant, return the constant in the smallest FP type
   // that can accurately represent it.  This allows us to turn
   // (float)((double)X+2.0) into x+2.0f.
@@ -1166,14 +1172,14 @@ static Value *LookThroughFPExtensions(Value *V) {
       return V;
     // Don't try to shrink to various long double types.
   }
-  
+
   return V;
 }
 
 Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   if (Instruction *I = commonCastTransforms(CI))
     return I;
-  
+
   // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
   // smaller than the destination type, we can eliminate the truncate by doing
   // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well
@@ -1190,7 +1196,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
       Type *SrcTy = OpI->getType();
       Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
       Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
-      if (LHSTrunc->getType() != SrcTy && 
+      if (LHSTrunc->getType() != SrcTy &&
           RHSTrunc->getType() != SrcTy) {
         unsigned DstSize = CI.getType()->getScalarSizeInBits();
         // If the source types were both smaller than the destination type of
@@ -1202,10 +1208,36 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
           return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
         }
       }
-      break;  
+      break;
+    }
+
+    // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+    if (BinaryOperator::isFNeg(OpI)) {
+      Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
+                                                 CI.getType());
+      return BinaryOperator::CreateFNeg(InnerTrunc);
     }
   }
-  
+
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
+  if (II) {
+    switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::fabs: {
+        // (fptrunc (fabs x)) -> (fabs (fptrunc x))
+        Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
+                                                   CI.getType());
+        Type *IntrinsicType[] = { CI.getType() };
+        Function *Overload =
+          Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
+                                    II->getIntrinsicID(), IntrinsicType);
+
+        Value *Args[] = { InnerTrunc };
+        return CallInst::Create(Overload, Args, II->getName());
+      }
+    }
+  }
+
   // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
   if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
@@ -1220,7 +1252,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         Arg->getOperand(0)->getType()->isFloatTy()) {
       Function *Callee = Call->getCalledFunction();
       Module *M = CI.getParent()->getParent()->getParent();
-      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", 
+      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
                                                    Callee->getAttributes(),
                                                    Builder->getFloatTy(),
                                                    Builder->getFloatTy(),
@@ -1228,15 +1260,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
       CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
                                        "sqrtfcall");
       ret->setAttributes(Callee->getAttributes());
-      
-      
+
+
       // Remove the old Call.  With -fmath-errno, it won't get marked readnone.
       ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
       EraseInstFromFunction(*Call);
       return ret;
     }
   }
-  
+
   return 0;
 }
 
@@ -1254,7 +1286,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
   // This is safe if the intermediate type has enough bits in its mantissa to
   // accurately represent all values of X.  For example, do not do this with
   // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui. 
+  // 'X' value would cause an undefined result for the fptoui.
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
       (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
@@ -1268,19 +1300,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
   Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
   if (OpI == 0)
     return commonCastTransforms(FI);
-  
+
   // fptosi(sitofp(X)) --> X
   // fptosi(uitofp(X)) --> X
   // This is safe if the intermediate type has enough bits in its mantissa to
   // accurately represent all values of X.  For example, do not do this with
   // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui. 
+  // 'X' value would cause an undefined result for the fptoui.
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
       (int)FI.getType()->getScalarSizeInBits() <=
                     OpI->getType()->getFPMantissaWidth())
     return ReplaceInstUsesWith(FI, OpI->getOperand(0));
-  
+
   return commonCastTransforms(FI);
 }
 
@@ -1296,21 +1328,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // If the source integer type is not the intptr_t type for this target, do a
   // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
   // cast to be exposed to other transforms.
-  if (TD) {
-    if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
-        TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreateTrunc(CI.getOperand(0),
-                                      TD->getIntPtrType(CI.getContext()));
-      return new IntToPtrInst(P, CI.getType());
-    }
-    if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
-        TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreateZExt(CI.getOperand(0),
-                                     TD->getIntPtrType(CI.getContext()));
-      return new IntToPtrInst(P, CI.getType());
-    }
+  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+      TD->getPointerSizeInBits()) {
+    Type *Ty = TD->getIntPtrType(CI.getContext());
+    if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+      Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+    Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+    return new IntToPtrInst(P, CI.getType());
   }
-  
+
   if (Instruction *I = commonCastTransforms(CI))
     return I;
 
@@ -1320,34 +1347,32 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
 /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
 Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
   Value *Src = CI.getOperand(0);
-  
+
   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
     // If casting the result of a getelementptr instruction with no offset, turn
     // this into a cast of the original pointer!
     if (GEP->hasAllZeroIndices()) {
       // Changing the cast operand is usually not a good idea but it is safe
-      // here because the pointer operand is being replaced with another 
+      // here because the pointer operand is being replaced with another
       // pointer operand so the opcode doesn't need to change.
       Worklist.Add(GEP);
       CI.setOperand(0, GEP->getOperand(0));
       return &CI;
     }
-    
+
     // If the GEP has a single use, and the base pointer is a bitcast, and the
     // GEP computes a constant offset, see if we can convert these three
     // instructions into fewer.  This typically happens with unions and other
     // non-type-safe code.
+    APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
     if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
-        GEP->hasAllConstantIndices()) {
-      SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
-      int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
-
+        GEP->accumulateConstantOffset(*TD, Offset)) {
       // Get the base pointer input of the bitcast, and the type it points to.
       Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
       Type *GEPIdxTy =
       cast<PointerType>(OrigBase->getType())->getElementType();
       SmallVector<Value*, 8> NewIndices;
-      if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+      if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
         // If we were able to index down into an element, create the GEP
         // and bitcast the result.  This eliminates one bitcast, potentially
         // two.
@@ -1355,15 +1380,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
         Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
         Builder->CreateGEP(OrigBase, NewIndices);
         NGEP->takeName(GEP);
-        
+
         if (isa<BitCastInst>(CI))
           return new BitCastInst(NGEP, CI.getType());
         assert(isa<PtrToIntInst>(CI));
         return new PtrToIntInst(NGEP, CI.getType());
-      }      
+      }
     }
   }
-  
+
   return commonCastTransforms(CI);
 }
 
@@ -1371,19 +1396,15 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // If the destination integer type is not the intptr_t type for this target,
   // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
   // to be exposed to other transforms.
-  if (TD) {
-    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                         TD->getIntPtrType(CI.getContext()));
-      return new TruncInst(P, CI.getType());
-    }
-    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                         TD->getIntPtrType(CI.getContext()));
-      return new ZExtInst(P, CI.getType());
-    }
+  if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
+    Type *Ty = TD->getIntPtrType(CI.getContext());
+    if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+      Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+    Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
+    return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
   }
-  
+
   return commonPointerCastTransforms(CI);
 }
 
@@ -1398,33 +1419,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
   // element size, or the input is a multiple of the output element size.
   // Convert the input type to have the same element type as the output.
   VectorType *SrcTy = cast<VectorType>(InVal->getType());
-  
+
   if (SrcTy->getElementType() != DestTy->getElementType()) {
     // The input types don't need to be identical, but for now they must be the
     // same size.  There is no specific reason we couldn't handle things like
     // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
-    // there yet. 
+    // there yet.
     if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
         DestTy->getElementType()->getPrimitiveSizeInBits())
       return 0;
-    
+
     SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
     InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
   }
-  
+
   // Now that the element types match, get the shuffle mask and RHS of the
   // shuffle to use, which depends on whether we're increasing or decreasing the
   // size of the input.
   SmallVector<uint32_t, 16> ShuffleMask;
   Value *V2;
-  
+
   if (SrcTy->getNumElements() > DestTy->getNumElements()) {
     // If we're shrinking the number of elements, just shuffle in the low
     // elements from the input and use undef as the second shuffle input.
     V2 = UndefValue::get(SrcTy);
     for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
       ShuffleMask.push_back(i);
-    
+
   } else {
     // If we're increasing the number of elements, shuffle in all of the
     // elements from InVal and fill the rest of the result elements with zeros
@@ -1438,7 +1459,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
     for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
       ShuffleMask.push_back(SrcElts);
   }
-  
+
   return new ShuffleVectorInst(InVal, V2,
                                ConstantDataVector::get(V2->getContext(),
                                                        ShuffleMask));
@@ -1465,7 +1486,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
                                      Type *VecEltTy) {
   // Undef values never contribute useful bits to the result.
   if (isa<UndefValue>(V)) return true;
-  
+
   // If we got down to a value of the right type, we win, try inserting into the
   // right element.
   if (V->getType() == VecEltTy) {
@@ -1473,15 +1494,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     if (Constant *C = dyn_cast<Constant>(V))
       if (C->isNullValue())
         return true;
-    
+
     // Fail if multiple elements are inserted into this slot.
     if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
       return false;
-    
+
     Elements[ElementIndex] = V;
     return true;
   }
-  
+
   if (Constant *C = dyn_cast<Constant>(V)) {
     // Figure out the # elements this provides, and bitcast it or slice it up
     // as required.
@@ -1492,7 +1513,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     if (NumElts == 1)
       return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
                                       ElementIndex, Elements, VecEltTy);
-    
+
     // Okay, this is a constant that covers multiple elements.  Slice it up into
     // pieces and insert each element-sized piece into the vector.
     if (!isa<IntegerType>(C->getType()))
@@ -1500,7 +1521,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
                                        C->getType()->getPrimitiveSizeInBits()));
     unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
     Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
-    
+
     for (unsigned i = 0; i != NumElts; ++i) {
       Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
                                                                i*ElementSize));
@@ -1510,23 +1531,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     }
     return true;
   }
-  
+
   if (!V->hasOneUse()) return false;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return false;
   switch (I->getOpcode()) {
   default: return false; // Unhandled case.
   case Instruction::BitCast:
     return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);  
+                                    Elements, VecEltTy);
   case Instruction::ZExt:
     if (!isMultipleOfTypeSize(
                           I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
                               VecEltTy))
       return false;
     return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);  
+                                    Elements, VecEltTy);
   case Instruction::Or:
     return CollectInsertionElements(I->getOperand(0), ElementIndex,
                                     Elements, VecEltTy) &&
@@ -1538,11 +1559,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     if (CI == 0) return false;
     if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
     unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-    
+
     return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
                                     Elements, VecEltTy);
   }
-      
+
   }
 }
 
@@ -1577,11 +1598,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
   Value *Result = Constant::getNullValue(CI.getType());
   for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
     if (Elements[i] == 0) continue;  // Unset element.
-    
+
     Result = IC.Builder->CreateInsertElement(Result, Elements[i],
                                              IC.Builder->getInt32(i));
   }
-  
+
   return Result;
 }
 
@@ -1589,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
 /// bitcast.  The various long double bitcasts can't get in here.
 static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+  // We need to know the target byte order to perform this optimization.
+  if (!IC.getDataLayout()) return 0;
+
   Value *Src = CI.getOperand(0);
   Type *DestTy = CI.getType();
 
@@ -1609,11 +1633,14 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
                                 VecTy->getPrimitiveSizeInBits() / DestWidth);
         VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
       }
-    
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+
+      unsigned Elt = 0;
+      if (IC.getDataLayout()->isBigEndian())
+        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
   }
-  
+
   // bitcast(trunc(lshr(bitcast(somevector), cst))
   ConstantInt *ShAmt = 0;
   if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
@@ -1630,8 +1657,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
                                 VecTy->getPrimitiveSizeInBits() / DestWidth);
         VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
       }
-      
+
       unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      if (IC.getDataLayout()->isBigEndian())
+        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
       return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
   }
@@ -1654,12 +1683,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     PointerType *SrcPTy = cast<PointerType>(SrcTy);
     Type *DstElTy = DstPTy->getElementType();
     Type *SrcElTy = SrcPTy->getElementType();
-    
+
     // If the address spaces don't match, don't eliminate the bitcast, which is
     // required for changing types.
     if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
       return 0;
-    
+
     // If we are casting a alloca to a pointer to a type of the same
     // size, rewrite the allocation instruction to allocate the "right" type.
     // There is no need to modify malloc calls because it is their bitcast that
@@ -1667,14 +1696,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
       if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
         return V;
-    
+
     // If the source and destination are pointers, and this cast is equivalent
     // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
     // This can enhance SROA and other transforms that want type-safe pointers.
     Constant *ZeroUInt =
       Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
     unsigned NumZeros = 0;
-    while (SrcElTy != DstElTy && 
+    while (SrcElTy != DstElTy &&
            isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
            SrcElTy->getNumContainedTypes() /* not "{}" */) {
       SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
@@ -1687,7 +1716,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       return GetElementPtrInst::CreateInBounds(Src, Idxs);
     }
   }
-  
+
   // Try to optimize int -> float bitcasts.
   if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
     if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
@@ -1700,7 +1729,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
       // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
     }
-    
+
     if (isa<IntegerType>(SrcTy)) {
       // If this is a cast from an integer to vector, check to see if the input
       // is a trunc or zext of a bitcast from vector.  If so, we can replace all
@@ -1713,7 +1742,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
                                                cast<VectorType>(DestTy), *this))
               return I;
       }
-      
+
       // If the input is an 'or' instruction, we may be doing shifts and ors to
       // assemble the elements of the vector manually.  Try to rip the code out
       // and replace it with insertelements.
@@ -1723,18 +1752,29 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   }
 
   if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
-    if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
-      Value *Elem = 
-        Builder->CreateExtractElement(Src,
-                   Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
-      return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+    if (SrcVTy->getNumElements() == 1) {
+      // If our destination is not a vector, then make this a straight
+      // scalar-scalar cast.
+      if (!DestTy->isVectorTy()) {
+        Value *Elem =
+          Builder->CreateExtractElement(Src,
+                     Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+        return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+      }
+
+      // Otherwise, see if our source is an insert. If so, then use the scalar
+      // component directly.
+      if (InsertElementInst *IEI =
+            dyn_cast<InsertElementInst>(CI.getOperand(0)))
+        return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
+                                DestTy);
     }
   }
 
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
     // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
     // a bitcast to a vector with the same # elts.
-    if (SVI->hasOneUse() && DestTy->isVectorTy() && 
+    if (SVI->hasOneUse() && DestTy->isVectorTy() &&
         cast<VectorType>(DestTy)->getNumElements() ==
               SVI->getType()->getNumElements() &&
         SVI->getType()->getNumElements() ==
@@ -1743,9 +1783,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       // If either of the operands is a cast from CI.getType(), then
       // evaluating the shuffle in the casted destination's type will allow
       // us to eliminate at least one cast.
-      if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && 
+      if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
            Tmp->getOperand(0)->getType() == DestTy) ||
-          ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && 
+          ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
            Tmp->getOperand(0)->getType() == DestTy)) {
         Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
         Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
@@ -1755,7 +1795,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       }
     }
   }
-  
+
   if (SrcTy->isPointerTy())
     return commonPointerCastTransforms(CI);
   return commonCastTransforms(CI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c3f8fe..a96e754 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
   }
 }
 
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
+  if (!ICmpInst::isSigned(pred))
+    return false;
+
+  if (RHS->isZero())
+    return ICmpInst::isRelational(pred);
+
+  if (RHS->isOne()) {
+    if (pred == ICmpInst::ICMP_SLT) {
+      pred = ICmpInst::ICMP_SLE;
+      return true;
+    }
+  } else if (RHS->isAllOnesValue()) {
+    if (pred == ICmpInst::ICMP_SGT) {
+      pred = ICmpInst::ICMP_SGE;
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // isHighOnes - Return true if the constant is of the form 1+0+.
 // This is the same as lowones(~X).
 static bool isHighOnes(const ConstantInt *CI) {
@@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   }
 
 
-  // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+  // If a magic bitvector captures the entire comparison state
   // of this load, replace it with computation that does:
   //   ((magic_cst >> i) & 1) != 0
-  if (ArrayElementCount <= 32 ||
-      (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
-    Type *Ty;
-    if (ArrayElementCount <= 32)
+  {
+    Type *Ty = 0;
+
+    // Look for an appropriate type:
+    // - The type of Idx if the magic fits
+    // - The smallest fitting legal type if we have a DataLayout
+    // - Default to i32
+    if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+      Ty = Idx->getType();
+    else if (TD)
+      Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+    else if (ArrayElementCount <= 32)
       Ty = Type::getInt32Ty(Init->getContext());
-    else
-      Ty = Type::getInt64Ty(Init->getContext());
-    Value *V = Builder->CreateIntCast(Idx, Ty, false);
-    V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
-    V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
-    return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+
+    if (Ty != 0) {
+      Value *V = Builder->CreateIntCast(Idx, Ty, false);
+      V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+      V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+      return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+    }
   }
 
   return 0;
@@ -1226,6 +1260,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         ICI.setOperand(0, NewAnd);
         return &ICI;
       }
+
+      // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any
+      // bit set in (X & AndCST) will produce a result greater than RHSV.
+      if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
+        unsigned NTZ = AndCST->getValue().countTrailingZeros();
+        if ((NTZ < AndCST->getBitWidth()) &&
+            APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV))
+          return new ICmpInst(ICmpInst::ICMP_NE, LHSI,
+                              Constant::getNullValue(RHS->getType()));
+      }
     }
 
     // Try to optimize things like "A[i]&42 == 0" to index computations.
@@ -1263,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     break;
   }
 
+  case Instruction::Mul: {       // (icmp pred (mul X, Val), CI)
+    ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!Val) break;
+
+    // If this is a signed comparison to 0 and the mul is sign preserving,
+    // use the mul LHS operand instead.
+    ICmpInst::Predicate pred = ICI.getPredicate();
+    if (isSignTest(pred, RHS) && !Val->isZero() &&
+        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+      return new ICmpInst(Val->isNegative() ?
+                          ICmpInst::getSwappedPredicate(pred) : pred,
+                          LHSI->getOperand(0),
+                          Constant::getNullValue(RHS->getType()));
+
+    break;
+  }
+
   case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
     ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
     if (!ShAmt) break;
@@ -1294,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
                             ConstantExpr::getLShr(RHS, ShAmt));
 
+      // If the shift is NSW and we compare to 0, then it is just shifting out
+      // sign bits, no need for an AND either.
+      if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantExpr::getLShr(RHS, ShAmt));
+
       if (LHSI->hasOneUse()) {
         // Otherwise strength reduce the shift into an and.
         uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1308,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       }
     }
 
+    // If this is a signed comparison to 0 and the shift is sign preserving,
+    // use the shift LHS operand instead.
+    ICmpInst::Predicate pred = ICI.getPredicate();
+    if (isSignTest(pred, RHS) &&
+        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+      return new ICmpInst(pred,
+                          LHSI->getOperand(0),
+                          Constant::getNullValue(RHS->getType()));
+
     // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
     bool TrueIfSigned = false;
     if (LHSI->hasOneUse() &&
@@ -1321,6 +1397,26 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
                           And, Constant::getNullValue(And->getType()));
     }
+
+    // Transform (icmp pred iM (shl iM %v, N), CI)
+    // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+    // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
+    // This enables to get rid of the shift in favor of a trunc which can be
+    // free on the target. It has the additional benefit of comparing to a
+    // smaller constant, which will be target friendly.
+    unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
+    if (LHSI->hasOneUse() &&
+        Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+      Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
+      Constant *NCI = ConstantExpr::getTrunc(
+                        ConstantExpr::getAShr(RHS,
+                          ConstantInt::get(RHS->getType(), Amt)),
+                        NTy);
+      return new ICmpInst(ICI.getPredicate(),
+                          Builder->CreateTrunc(LHSI->getOperand(0), NTy),
+                          NCI);
+    }
+
     break;
   }
 
@@ -1502,6 +1598,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
             return new ICmpInst(pred, X, NegX);
           }
         }
+        break;
+      case Instruction::Mul:
+        if (RHSV == 0 && BO->hasNoSignedWrap()) {
+          if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+            // The trivial case (mul X, 0) is handled by InstSimplify
+            // General case : (mul X, C) != 0 iff X != 0
+            //                (mul X, C) == 0 iff X == 0
+            if (!BOC->isZero())
+              return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                                  Constant::getNullValue(RHS->getType()));
+          }
+        }
+        break;
       default: break;
       }
     } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4d106fc..337cfe3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
@@ -150,26 +150,6 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
   return 0;
 }
 
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) {
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::BitCast ||
-        (CE->getOpcode() == Instruction::GetElementPtr &&
-         cast<GEPOperator>(CE)->hasAllZeroIndices()))
-      return getPointeeAlignment(CE->getOperand(0), TD);
-
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    if (!GV->isDeclaration())
-      return TD.getPreferredAlignment(GV);
-
-  if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
-    if (PT->getElementType()->isSized())
-      return TD.getABITypeAlignment(PT->getElementType());
-
-  return 0;
-}
-
 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Ensure that the alloca array size argument has type intptr_t, so that
   // any casting is exposed early.
@@ -265,7 +245,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     }
   }
 
-  if (TD) {
+  if (AI.getAlignment()) {
     // Check to see if this allocation is only modified by a memcpy/memmove from
     // a constant global whose alignment is equal to or exceeds that of the
     // allocation.  If this is the case, we can change all users to use
@@ -274,7 +254,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     // is only subsequently read.
     SmallVector<Instruction *, 4> ToDelete;
     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
-      if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+      unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
+                                                        AI.getAlignment(), TD);
+      if (AI.getAlignment() <= SourceAlign) {
         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
@@ -820,6 +802,13 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   InsertNewInstBefore(NewSI, *BBI);
   NewSI->setDebugLoc(OtherStore->getDebugLoc()); 
 
+  // If the two stores had the same TBAA tag, preserve it.
+  if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
+    if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
+                               OtherStore->getMetadata(LLVMContext::MD_tbaa))))
+      NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+  
   // Nuke the old stores.
   EraseInstFromFunction(SI);
   EraseInstFromFunction(*OtherStore);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cefe45e..173f2bf 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -37,7 +37,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
                       m_Value(B))) &&
       // The "1" can be any value known to be a power of 2.
-      isPowerOfTwo(PowerOf2, IC.getDataLayout())) {
+      isKnownToBeAPowerOfTwo(PowerOf2)) {
     A = IC.Builder->CreateSub(A, B);
     return IC.Builder->CreateShl(PowerOf2, A);
   }
@@ -45,8 +45,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
   // inexact.  Similarly for <<.
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
-    if (I->isLogicalShift() &&
-        isPowerOfTwo(I->getOperand(0), IC.getDataLayout())) {
+    if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) {
       // We know that this is an exact/nuw shift and that the input is a
       // non-zero context as well.
       if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
@@ -252,24 +251,136 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
+//
+// Detect pattern:
+//
+// log2(Y*0.5)
+//
+// And check for corresponding fast math flags
+//
+
+static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
+
+   if (!Op->hasOneUse())
+     return;
+
+   IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op);
+   if (!II)
+     return;
+   if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra())
+     return;
+   Log2 = II;
+
+   Value *OpLog2Of = II->getArgOperand(0);
+   if (!OpLog2Of->hasOneUse())
+     return;
+
+   Instruction *I = dyn_cast<Instruction>(OpLog2Of);
+   if (!I)
+     return;
+   if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+     return;
+              
+   ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
+   if (CFP && CFP->isExactlyValue(0.5)) {
+     Y = I->getOperand(1);
+     return;
+   }
+   CFP = dyn_cast<ConstantFP>(I->getOperand(1));
+   if (CFP && CFP->isExactlyValue(0.5))
+     Y = I->getOperand(0);
+} 
+
+/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
+/// true iff the given value is FMul or FDiv with one and only one operand
+/// being a normal constant (i.e. not Zero/NaN/Infinity).
+static bool isFMulOrFDivWithConstant(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I || (I->getOpcode() != Instruction::FMul && 
+             I->getOpcode() != Instruction::FDiv))
+    return false;
+
+  ConstantFP *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
+  ConstantFP *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
+
+  if (C0 && C1)
+    return false;
+
+  return (C0 && C0->getValueAPF().isNormal()) ||
+         (C1 && C1->getValueAPF().isNormal());
+}
+
+static bool isNormalFp(const ConstantFP *C) {
+  const APFloat &Flt = C->getValueAPF();
+  return Flt.isNormal() && !Flt.isDenormal();
+}
+
+/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
+/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
+/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
+/// This function is to simplify "FMulOrDiv * C" and returns the 
+/// resulting expression. Note that this function could return NULL in
+/// case the constants cannot be folded into a normal floating-point.
+/// 
+Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+                                   Instruction *InsertBefore) {
+  assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
+
+  Value *Opnd0 = FMulOrDiv->getOperand(0);
+  Value *Opnd1 = FMulOrDiv->getOperand(1);
+
+  ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
+  ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+
+  BinaryOperator *R = 0;
+
+  // (X * C0) * C => X * (C0*C)
+  if (FMulOrDiv->getOpcode() == Instruction::FMul) {
+    Constant *F = ConstantExpr::getFMul(C1 ? C1 : C0, C);
+    if (isNormalFp(cast<ConstantFP>(F)))
+      R = BinaryOperator::CreateFMul(C1 ? Opnd0 : Opnd1, F);
+  } else {
+    if (C0) {
+      // (C0 / X) * C => (C0 * C) / X
+      ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+      if (isNormalFp(F))
+        R = BinaryOperator::CreateFDiv(F, Opnd1);
+    } else {
+      // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
+      ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
+      if (isNormalFp(F)) {
+        R = BinaryOperator::CreateFMul(Opnd0, F);
+      } else {
+        // (X / C1) * C => X / (C1/C) 
+        Constant *F = ConstantExpr::getFDiv(C1, C);
+        if (isNormalFp(cast<ConstantFP>(F)))
+          R = BinaryOperator::CreateFDiv(Opnd0, F);
+      }
+    }
+  }
+
+  if (R) {
+    R->setHasUnsafeAlgebra(true);
+    InsertNewInstWith(R, *InsertBefore);
+  }
+
+  return R;
+}
+
 Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // Simplify mul instructions with a constant RHS.
-  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
-      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
-      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      if (Op1F->isExactlyValue(1.0))
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'fmul double %X, 1.0'
-    } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) {
-      // As above, vector X*splat(1.0) -> X in all defined cases.
-      if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
-        if (F->isExactlyValue(1.0))
-          return ReplaceInstUsesWith(I, Op0);
-    }
+  if (isa<Constant>(Op0))
+    std::swap(Op0, Op1);
+
+  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  bool AllowReassociate = I.hasUnsafeAlgebra();
 
+  // Simplify mul instructions with a constant RHS.
+  if (isa<Constant>(Op1)) {
     // Try to fold constant mul into select arguments.
     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -278,11 +389,146 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     if (isa<PHINode>(Op0))
       if (Instruction *NV = FoldOpIntoPhi(I))
         return NV;
+
+    ConstantFP *C = dyn_cast<ConstantFP>(Op1);
+    if (C && AllowReassociate && C->getValueAPF().isNormal()) {
+      // Let MDC denote an expression in one of these forms:
+      // X * C, C/X, X/C, where C is a constant.
+      //
+      // Try to simplify "MDC * Constant"
+      if (isFMulOrFDivWithConstant(Op0)) {
+        Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I);
+        if (V)
+          return ReplaceInstUsesWith(I, V);
+      }
+
+      // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
+      Instruction *FAddSub = dyn_cast<Instruction>(Op0);
+      if (FAddSub &&
+          (FAddSub->getOpcode() == Instruction::FAdd ||
+           FAddSub->getOpcode() == Instruction::FSub)) {
+        Value *Opnd0 = FAddSub->getOperand(0);
+        Value *Opnd1 = FAddSub->getOperand(1);
+        ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
+        ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+        bool Swap = false;
+        if (C0) {
+          std::swap(C0, C1);
+          std::swap(Opnd0, Opnd1);
+          Swap = true; 
+        }
+
+        if (C1 && C1->getValueAPF().isNormal() &&
+            isFMulOrFDivWithConstant(Opnd0)) {
+          Value *M1 = ConstantExpr::getFMul(C1, C);
+          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? 
+                      foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
+                      0;
+          if (M0 && M1) {
+            if (Swap && FAddSub->getOpcode() == Instruction::FSub)
+              std::swap(M0, M1);
+
+            Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
+                        BinaryOperator::CreateFAdd(M0, M1) :
+                        BinaryOperator::CreateFSub(M0, M1);
+            Instruction *RI = cast<Instruction>(R);
+            RI->copyFastMathFlags(&I);
+            return RI;
+          }
+        }
+      }
+    }
+  }
+
+
+  // Under unsafe algebra do:
+  // X * log2(0.5*Y) = X*log2(Y) - X
+  if (I.hasUnsafeAlgebra()) {
+    Value *OpX = NULL;
+    Value *OpY = NULL;
+    IntrinsicInst *Log2;
+    detectLog2OfHalf(Op0, OpY, Log2);
+    if (OpY) {
+      OpX = Op1;
+    } else {
+      detectLog2OfHalf(Op1, OpY, Log2);
+      if (OpY) {
+        OpX = Op0;
+      }
+    }
+    // if pattern detected emit alternate sequence
+    if (OpX && OpY) {
+      Log2->setArgOperand(0, OpY);
+      Value *FMulVal = Builder->CreateFMul(OpX, Log2);
+      Instruction *FMul = cast<Instruction>(FMulVal);
+      FMul->copyFastMathFlags(Log2);
+      Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
+      FSub->copyFastMathFlags(Log2);
+      return FSub;
+    }
   }
 
-  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(Op1))
-      return BinaryOperator::CreateFMul(Op0v, Op1v);
+  // Handle symmetric situation in a 2-iteration loop
+  Value *Opnd0 = Op0;
+  Value *Opnd1 = Op1;
+  for (int i = 0; i < 2; i++) {
+    bool IgnoreZeroSign = I.hasNoSignedZeros();
+    if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+      Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
+      Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
+
+      // -X * -Y => X*Y
+      if (N1)
+        return BinaryOperator::CreateFMul(N0, N1);
+
+      if (Opnd0->hasOneUse()) {
+        // -X * Y => -(X*Y) (Promote negation as high as possible)
+        Value *T = Builder->CreateFMul(N0, Opnd1);
+        cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
+        Instruction *Neg = BinaryOperator::CreateFNeg(T);
+        if (I.getFastMathFlags().any()) {
+          cast<Instruction>(T)->copyFastMathFlags(&I);
+          Neg->copyFastMathFlags(&I);
+        }
+        return Neg;
+      }
+    }
+
+    // (X*Y) * X => (X*X) * Y where Y != X
+    //  The purpose is two-fold: 
+    //   1) to form a power expression (of X).
+    //   2) potentially shorten the critical path: After transformation, the
+    //  latency of the instruction Y is amortized by the expression of X*X,
+    //  and therefore Y is in a "less critical" position compared to what it
+    //  was before the transformation.
+    //
+    if (AllowReassociate) {
+      Value *Opnd0_0, *Opnd0_1;
+      if (Opnd0->hasOneUse() &&
+          match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
+        Value *Y = 0;
+        if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
+          Y = Opnd0_1;
+        else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
+          Y = Opnd0_0;
+
+        if (Y) {
+          Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
+          T->copyFastMathFlags(&I);
+          T->setDebugLoc(I.getDebugLoc());
+
+          Instruction *R = BinaryOperator::CreateFMul(T, Y);
+          R->copyFastMathFlags(&I);
+          return R;
+        }
+      }
+    }
+
+    if (!isa<Constant>(Op1))
+      std::swap(Opnd0, Opnd1);
+    else
+      break;
+  }
 
   return Changed ? &I : 0;
 }
@@ -567,21 +813,140 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   return 0;
 }
 
+/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
+/// FP value and:
+///    1) 1/C is exact, or 
+///    2) reciprocal is allowed.
+/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// returned; otherwise, NULL is returned.
+///
+static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
+                                             ConstantFP *Divisor,
+                                             bool AllowReciprocal) {
+  const APFloat &FpVal = Divisor->getValueAPF();
+  APFloat Reciprocal(FpVal.getSemantics());
+  bool Cvt = FpVal.getExactInverse(&Reciprocal);
+    
+  if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
+    Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
+    (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
+    Cvt = !Reciprocal.isDenormal();
+  }
+
+  if (!Cvt)
+    return 0;
+
+  ConstantFP *R;
+  R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal);
+  return BinaryOperator::CreateFMul(Dividend, R);
+}
+
 Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
 
+  bool AllowReassociate = I.hasUnsafeAlgebra();
+  bool AllowReciprocal = I.hasAllowReciprocal();
+
   if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
-    const APFloat &Op1F = Op1C->getValueAPF();
-
-    // If the divisor has an exact multiplicative inverse we can turn the fdiv
-    // into a cheaper fmul.
-    APFloat Reciprocal(Op1F.getSemantics());
-    if (Op1F.getExactInverse(&Reciprocal)) {
-      ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal);
-      return BinaryOperator::CreateFMul(Op0, RFP);
+    if (AllowReassociate) {
+      ConstantFP *C1 = 0;
+      ConstantFP *C2 = Op1C;
+      Value *X;
+      Instruction *Res = 0;
+
+      if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) {
+        // (X*C1)/C2 => X * (C1/C2)
+        //
+        Constant *C = ConstantExpr::getFDiv(C1, C2);
+        const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+        if (F.isNormal() && !F.isDenormal())
+          Res = BinaryOperator::CreateFMul(X, C);
+      } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) {
+        // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed]
+        //
+        Constant *C = ConstantExpr::getFMul(C1, C2);
+        const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+        if (F.isNormal() && !F.isDenormal()) {
+          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), 
+                                         AllowReciprocal);
+          if (!Res)
+            Res = BinaryOperator::CreateFDiv(X, C); 
+        }
+      }
+
+      if (Res) {
+        Res->setFastMathFlags(I.getFastMathFlags());
+        return Res;
+      }
+    }
+
+    // X / C => X * 1/C
+    if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal))
+      return T;
+
+    return 0;
+  }
+
+  if (AllowReassociate && isa<ConstantFP>(Op0)) {
+    ConstantFP *C1 = cast<ConstantFP>(Op0), *C2;
+    Constant *Fold = 0;
+    Value *X;
+    bool CreateDiv = true;
+
+    // C1 / (X*C2) => (C1/C2) / X
+    if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2))))
+      Fold = ConstantExpr::getFDiv(C1, C2);
+    else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) {
+      // C1 / (X/C2) => (C1*C2) / X
+      Fold = ConstantExpr::getFMul(C1, C2);
+    } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) {
+      // C1 / (C2/X) => (C1/C2) * X
+      Fold = ConstantExpr::getFDiv(C1, C2);
+      CreateDiv = false;
+    }
+
+    if (Fold) {
+      const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
+      if (FoldC.isNormal() && !FoldC.isDenormal()) {
+        Instruction *R = CreateDiv ? 
+                         BinaryOperator::CreateFDiv(Fold, X) :
+                         BinaryOperator::CreateFMul(X, Fold);
+        R->setFastMathFlags(I.getFastMathFlags());
+        return R;
+      }
+    }
+    return 0;
+  }
+
+  if (AllowReassociate) {
+    Value *X, *Y;
+    Value *NewInst = 0;
+    Instruction *SimpR = 0;
+
+    if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
+      // (X/Y) / Z => X / (Y*Z)
+      //
+      if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) {
+        NewInst = Builder->CreateFMul(Y, Op1);
+        SimpR = BinaryOperator::CreateFDiv(X, NewInst);
+      }
+    } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) {
+      // Z / (X/Y) => Z*Y / X
+      //
+      if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) {
+        NewInst = Builder->CreateFMul(Op0, Y);
+        SimpR = BinaryOperator::CreateFDiv(NewInst, X);
+      }
+    }
+
+    if (NewInst) {
+      if (Instruction *T = dyn_cast<Instruction>(NewInst))
+        T->setDebugLoc(I.getDebugLoc());
+      SimpR->setFastMathFlags(I.getFastMathFlags());
+      return SimpR;
     }
   }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index de9c77e..b0a998c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
 using namespace llvm;
 
 /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a2d4c88..121aa1f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Support/PatternMatch.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     // If this is a non-volatile load or a cast from the same type,
     // merge.
     if (TI->isCast()) {
-      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+      Type *FIOpndTy = FI->getOperand(0)->getType();
+      if (TI->getOperand(0)->getType() != FIOpndTy)
         return 0;
       // The select condition may be a vector. We may only change the operand
       // type if the vector width remains the same (and matches the condition).
       Type *CondTy = SI.getCondition()->getType();
-      if (CondTy->isVectorTy() && CondTy->getVectorNumElements() !=
-          FI->getOperand(0)->getType()->getVectorNumElements())
+      if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
+          CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
         return 0;
     } else {
       return 0;  // unknown unary op.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 57021f1..8cf76e5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -49,7 +49,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
     I.setOperand(1, Rem);
     return &I;
   }
-  
+
   return 0;
 }
 
@@ -70,10 +70,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
   // We can always evaluate constants shifted.
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // If this is the opposite shift, we can directly reuse the input of the shift
   // if the needed bits are already zero in the input.  This allows us to reuse
   // the value which means that we don't care if the shift has multiple uses.
@@ -95,14 +95,14 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
         return CanEvaluateTruncated(I->getOperand(0), Ty);
       }
 #endif
-      
+
     }
   }
-  
+
   // We can't mutate something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
   if (!I->hasOneUse()) return false;
-  
+
   switch (I->getOpcode()) {
   default: return false;
   case Instruction::And:
@@ -111,7 +111,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
     // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
     return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
            CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
-      
+
   case Instruction::Shl: {
     // We can often fold the shift into shifts-by-a-constant.
     CI = dyn_cast<ConstantInt>(I->getOperand(1));
@@ -119,10 +119,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
 
     // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
     if (isLeftShift) return true;
-    
+
     // We can always turn shl(c)+shr(c) -> and(c2).
     if (CI->getValue() == NumBits) return true;
-      
+
     unsigned TypeWidth = I->getType()->getScalarSizeInBits();
 
     // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
@@ -133,20 +133,20 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                        APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
-      
+
     return false;
   }
   case Instruction::LShr: {
     // We can often fold the shift into shifts-by-a-constant.
     CI = dyn_cast<ConstantInt>(I->getOperand(1));
     if (CI == 0) return false;
-    
+
     // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
     if (!isLeftShift) return true;
-    
+
     // We can always turn lshr(c)+shl(c) -> and(c2).
     if (CI->getValue() == NumBits) return true;
-      
+
     unsigned TypeWidth = I->getType()->getScalarSizeInBits();
 
     // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
@@ -157,7 +157,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                           APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
-      
+
     return false;
   }
   case Instruction::Select: {
@@ -175,7 +175,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
         return false;
     return true;
   }
-  }      
+  }
 }
 
 /// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
@@ -194,7 +194,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                                          IC.getTargetLibraryInfo());
     return V;
   }
-  
+
   Instruction *I = cast<Instruction>(V);
   IC.Worklist.Add(I);
 
@@ -207,7 +207,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
     I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
     return I;
-    
+
   case Instruction::Shl: {
     BinaryOperator *BO = cast<BinaryOperator>(I);
     unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
@@ -227,7 +227,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       BO->setHasNoSignedWrap(false);
       return I;
     }
-    
+
     // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
     // zeros.
     if (CI->getValue() == NumBits) {
@@ -240,7 +240,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       }
       return V;
     }
-    
+
     // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
     // the and won't be needed.
     assert(CI->getZExtValue() > NumBits);
@@ -255,19 +255,19 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
     // We only accept shifts-by-a-constant in CanEvaluateShifted.
     ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
-    
+
     // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
     if (!isLeftShift) {
       // If this is oversized composite shift, then unsigned shifts get 0.
       unsigned NewShAmt = NumBits+CI->getZExtValue();
       if (NewShAmt >= TypeWidth)
         return Constant::getNullValue(BO->getType());
-      
+
       BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
       BO->setIsExact(false);
       return I;
     }
-    
+
     // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
     // zeros.
     if (CI->getValue() == NumBits) {
@@ -280,7 +280,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       }
       return V;
     }
-    
+
     // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
     // the and won't be needed.
     assert(CI->getZExtValue() > NumBits);
@@ -289,7 +289,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     BO->setIsExact(false);
     return BO;
   }
-    
+
   case Instruction::Select:
     I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
     I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
@@ -304,7 +304,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                                               NumBits, isLeftShift, IC));
     return PN;
   }
-  }      
+  }
 }
 
 
@@ -312,24 +312,24 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
 Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                                BinaryOperator &I) {
   bool isLeftShift = I.getOpcode() == Instruction::Shl;
-  
-  
+
+
   // See if we can propagate this shift into the input, this covers the trivial
   // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
   if (I.getOpcode() != Instruction::AShr &&
       CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
     DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
               " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");
-    
-    return ReplaceInstUsesWith(I, 
+
+    return ReplaceInstUsesWith(I,
                  GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
   }
-  
-  
-  // See if we can simplify any instructions used by the instruction whose sole 
+
+
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
-  
+
   // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
   // a signed shift.
   //
@@ -340,14 +340,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
     return &I;
   }
-  
+
   // ((X*C1) << C2) == (X * (C1 << C2))
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
     if (BO->getOpcode() == Instruction::Mul && isLeftShift)
       if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
         return BinaryOperator::CreateMul(BO->getOperand(0),
                                         ConstantExpr::getShl(BOOp, Op1));
-  
+
   // Try to fold constant and into select arguments.
   if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
     if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -355,7 +355,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   if (isa<PHINode>(Op0))
     if (Instruction *NV = FoldOpIntoPhi(I))
       return NV;
-  
+
   // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
   if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
     Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
@@ -364,7 +364,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     // require that the input operand is a shift-by-constant so that we have
     // confidence that the shifts will get folded together.  We could do this
     // xform in more cases, but it is unlikely to be profitable.
-    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
         isa<ConstantInt>(TrOp->getOperand(1))) {
       // Okay, we'll do this xform.  Make the shift of shift.
       Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
@@ -378,7 +378,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
       unsigned DstSize = TI->getType()->getScalarSizeInBits();
       APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
-      
+
       // The mask we constructed says what the trunc would do if occurring
       // between the shifts.  We want to know the effect *after* the second
       // shift.  We know that it is a logical shift by a constant, so adjust the
@@ -399,7 +399,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       return new TruncInst(And, I.getType());
     }
   }
-  
+
   if (Op0->hasOneUse()) {
     if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
       // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -425,14 +425,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                      APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
         }
-        
+
         // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
         Value *Op0BOOp1 = Op0BO->getOperand(1);
         if (isLeftShift && Op0BOOp1->hasOneUse() &&
-            match(Op0BOOp1, 
-                  m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
-                        m_ConstantInt(CC))) &&
-            cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+            match(Op0BOOp1,
+                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+                        m_ConstantInt(CC)))) {
           Value *YS =   // (Y << C)
             Builder->CreateShl(Op0BO->getOperand(0), Op1,
                                          Op0BO->getName());
@@ -442,7 +441,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
         }
       }
-        
+
       // FALL THROUGH.
       case Instruction::Sub: {
         // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -458,34 +457,32 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                      APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
         }
-        
+
         // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
         if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
             match(Op0BO->getOperand(0),
-                  m_And(m_Shr(m_Value(V1), m_Value(V2)),
-                        m_ConstantInt(CC))) && V2 == Op1 &&
-            cast<BinaryOperator>(Op0BO->getOperand(0))
-                ->getOperand(0)->hasOneUse()) {
+                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
+                        m_ConstantInt(CC))) && V2 == Op1) {
           Value *YS = // (Y << C)
             Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
           // X & (CC << C)
           Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                          V1->getName()+".mask");
-          
+
           return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
         }
-        
+
         break;
       }
       }
-      
-      
+
+
       // If the operand is an bitwise operator with a constant RHS, and the
       // shift is the only use, we can pull it out of the shift.
       if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
         bool isValid = true;     // Valid only for And, Or, Xor
         bool highBitSet = false; // Transform if high bit of constant set?
-        
+
         switch (Op0BO->getOpcode()) {
         default: isValid = false; break;   // Do not perform transform!
         case Instruction::Add:
@@ -499,7 +496,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           highBitSet = true;
           break;
         }
-        
+
         // If this is a signed shift right, and the high bit is modified
         // by the logical operation, do not perform the transformation.
         // The highBitSet boolean indicates the value of the high bit of
@@ -508,26 +505,26 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         //
         if (isValid && I.getOpcode() == Instruction::AShr)
           isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
-        
+
         if (isValid) {
           Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
-          
+
           Value *NewShift =
             Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
           NewShift->takeName(Op0BO);
-          
+
           return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
                                         NewRHS);
         }
       }
     }
   }
-  
+
   // Find out if this is a shift of a shift by a constant.
   BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
   if (ShiftOp && !ShiftOp->isShift())
     ShiftOp = 0;
-  
+
   if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
 
     // This is a constant shift of a constant shift. Be careful about hiding
@@ -548,9 +545,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
     if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
     Value *X = ShiftOp->getOperand(0);
-    
+
     IntegerType *Ty = cast<IntegerType>(I.getType());
-    
+
     // Check for (X << c1) << c2  and  (X >> c1) >> c2
     if (I.getOpcode() == ShiftOp->getOpcode()) {
       uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
@@ -561,11 +558,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
         AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
       }
-      
+
       return BinaryOperator::Create(I.getOpcode(), X,
                                     ConstantInt::get(Ty, AmtSum));
     }
-    
+
     if (ShiftAmt1 == ShiftAmt2) {
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr &&
@@ -605,7 +602,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return NewLShr;
         }
         Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
-        
+
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
@@ -653,12 +650,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return NewShl;
         }
         Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
-        
+
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
       }
-      
+
       // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
       // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
       if (I.getOpcode() == Instruction::AShr &&
@@ -682,21 +679,21 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
                                  I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
                                  TD))
     return ReplaceInstUsesWith(I, V);
-  
+
   if (Instruction *V = commonShiftTransforms(I))
     return V;
-  
+
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
     unsigned ShAmt = Op1C->getZExtValue();
-    
+
     // If the shifted-out value is known-zero, then this is a NUW shift.
-    if (!I.hasNoUnsignedWrap() && 
+    if (!I.hasNoUnsignedWrap() &&
         MaskedValueIsZero(I.getOperand(0),
                           APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
           I.setHasNoUnsignedWrap();
           return &I;
         }
-    
+
     // If the shifted out value is all signbits, this is a NSW shift.
     if (!I.hasNoSignedWrap() &&
         ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
@@ -712,7 +709,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
       match(I.getOperand(1), m_Constant(C2)))
     return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
 
-  return 0;    
+  return 0;
 }
 
 Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
@@ -722,9 +719,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
 
   if (Instruction *R = commonShiftTransforms(I))
     return R;
-  
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  
+
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     unsigned ShAmt = Op1C->getZExtValue();
 
@@ -743,15 +740,15 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
         return new ZExtInst(Cmp, II->getType());
       }
     }
-  
+
     // If the shifted-out value is known-zero, then this is an exact shift.
-    if (!I.isExact() && 
+    if (!I.isExact() &&
         MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
       I.setIsExact();
       return &I;
-    }    
+    }
   }
-  
+
   return 0;
 }
 
@@ -762,12 +759,12 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
 
   if (Instruction *R = commonShiftTransforms(I))
     return R;
-  
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     unsigned ShAmt = Op1C->getZExtValue();
-    
+
     // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
     // have a sign-extend idiom.
     Value *X;
@@ -791,23 +788,23 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     }
 
     // If the shifted-out value is known-zero, then this is an exact shift.
-    if (!I.isExact() && 
+    if (!I.isExact() &&
         MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
       I.setIsExact();
       return &I;
     }
-  }            
-  
+  }
+
   // See if we can turn a signed shr into an unsigned shr.
   if (MaskedValueIsZero(Op0,
                         APInt::getSignBit(I.getType()->getScalarSizeInBits())))
     return BinaryOperator::CreateLShr(Op0, Op1);
-  
+
   // Arithmetic shifting an all-sign-bit value is a no-op.
   unsigned NumSignBits = ComputeNumSignBits(Op0);
   if (NumSignBits == Op0->getType()->getScalarSizeInBits())
     return ReplaceInstUsesWith(I, Op0);
-  
+
   return 0;
 }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 602b203..8add1ea 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -14,17 +14,18 @@
 
 
 #include "InstCombine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
-
-/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
 /// specified instruction is a constant integer.  If so, check to see if there
 /// are any bits set in the constant that are not demanded.  If so, shrink the
 /// constant and return true.
-static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
                                    APInt Demanded) {
   assert(I && "No instruction?");
   assert(OpNo < I->getNumOperands() && "Operand index too large");
@@ -53,8 +54,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
-  
-  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, 
+
+  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
                                      KnownZero, KnownOne, 0);
   if (V == 0) return false;
   if (V == &Inst) return true;
@@ -65,7 +66,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
 /// specified instruction operand if possible, updating it in place.  It returns
 /// true if it made any change and false otherwise.
-bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
                                         APInt &KnownZero, APInt &KnownOne,
                                         unsigned Depth) {
   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
@@ -86,7 +87,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
 /// to be one in the expression.  KnownZero contains all the bits that are known
 /// to be zero in the expression. These are provided to potentially allow the
 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
-/// the expression. KnownOne and KnownZero always follow the invariant that 
+/// the expression. KnownOne and KnownZero always follow the invariant that
 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
@@ -133,10 +134,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return 0;
     return UndefValue::get(VTy);
   }
-  
+
   if (Depth == 6)        // Limit search depth.
     return 0;
-  
+
   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
 
@@ -158,61 +159,74 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // If either the LHS or the RHS are Zero, the result is zero.
       ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
       ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
-      
+
       // If all of the demanded bits are known 1 on one side, return the other.
       // These bits cannot contribute to the result of the 'and' in this
       // context.
-      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
           (DemandedMask & ~LHSKnownZero))
         return I->getOperand(0);
-      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
           (DemandedMask & ~RHSKnownZero))
         return I->getOperand(1);
-      
+
       // If all of the demanded bits in the inputs are known zeros, return zero.
       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
         return Constant::getNullValue(VTy);
-      
+
     } else if (I->getOpcode() == Instruction::Or) {
       // We can simplify (X|Y) -> X or Y in the user's context if we know that
       // only bits from X or Y are demanded.
-      
+
       // If either the LHS or the RHS are One, the result is One.
       ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
       ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
-      
+
       // If all of the demanded bits are known zero on one side, return the
       // other.  These bits cannot contribute to the result of the 'or' in this
       // context.
-      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
           (DemandedMask & ~LHSKnownOne))
         return I->getOperand(0);
-      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
           (DemandedMask & ~RHSKnownOne))
         return I->getOperand(1);
-      
+
       // If all of the potentially set bits on one side are known to be set on
       // the other side, just use the 'other' side.
-      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
           (DemandedMask & (~RHSKnownZero)))
         return I->getOperand(0);
-      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
           (DemandedMask & (~LHSKnownZero)))
         return I->getOperand(1);
+    } else if (I->getOpcode() == Instruction::Xor) {
+      // We can simplify (X^Y) -> X or Y in the user's context if we know that
+      // only bits from X or Y are demanded.
+
+      ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+
+      // If all of the demanded bits are known zero on one side, return the
+      // other.
+      if ((DemandedMask & RHSKnownZero) == DemandedMask)
+        return I->getOperand(0);
+      if ((DemandedMask & LHSKnownZero) == DemandedMask)
+        return I->getOperand(1);
     }
-    
+
     // Compute the KnownZero/KnownOne bits to simplify things downstream.
     ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
     return 0;
   }
-  
+
   // If this is the root being simplified, allow it to have multiple uses,
   // just set the DemandedMask to all bits so that we can try to simplify the
   // operands.  This allows visitTruncInst (for example) to simplify the
   // operand of a trunc without duplicating all the logic below.
   if (Depth == 0 && !V->hasOneUse())
     DemandedMask = APInt::getAllOnesValue(BitWidth);
-  
+
   switch (I->getOpcode()) {
   default:
     ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
@@ -224,26 +238,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known 1 on one side, return the other.
     // These bits cannot contribute to the result of the 'and'.
-    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
         (DemandedMask & ~LHSKnownZero))
       return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
         (DemandedMask & ~RHSKnownZero))
       return I->getOperand(1);
-    
+
     // If all of the demanded bits in the inputs are known zeros, return zero.
     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
       return Constant::getNullValue(VTy);
-      
+
     // If the RHS is a constant, see if we can simplify it.
     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
       return I;
-      
+
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne = RHSKnownOne & LHSKnownOne;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
@@ -251,36 +265,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Or:
     // If either the LHS or the RHS are One, the result is One.
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
                              RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'or'.
-    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
         (DemandedMask & ~LHSKnownOne))
       return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
         (DemandedMask & ~RHSKnownOne))
       return I->getOperand(1);
 
     // If all of the potentially set bits on one side are known to be set on
     // the other side, just use the 'other' side.
-    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
         (DemandedMask & (~RHSKnownZero)))
       return I->getOperand(0);
-    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
         (DemandedMask & (~LHSKnownZero)))
       return I->getOperand(1);
-        
+
     // If the RHS is a constant, see if we can simplify it.
     if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
-          
+
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero = RHSKnownZero & LHSKnownZero;
     // Output known-1 are known to be set if set in either the LHS | RHS.
@@ -289,34 +303,34 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::Xor: {
     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
                              RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'xor'.
     if ((DemandedMask & RHSKnownZero) == DemandedMask)
       return I->getOperand(0);
     if ((DemandedMask & LHSKnownZero) == DemandedMask)
       return I->getOperand(1);
-    
+
     // If all of the demanded bits are known to be zero on one side or the
     // other, turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
-      Instruction *Or = 
+      Instruction *Or =
         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                  I->getName());
       return InsertNewInstWith(Or, *I);
     }
-    
+
     // If all of the demanded bits on one side are known, and all of the set
     // bits on that side are also known to be set on the other side, turn this
     // into an AND, as we know the bits will be cleared.
     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
-    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
+    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
       // all known
       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
         Constant *AndC = Constant::getIntegerValue(VTy,
@@ -325,12 +339,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         return InsertNewInstWith(And, *I);
       }
     }
-    
+
     // If the RHS is a constant, see if we can simplify it.
     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
     if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
-    
+
     // If our LHS is an 'and' and if it has one use, and if any of the bits we
     // are flipping are known to be set, then the xor is just resetting those
     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
@@ -343,12 +357,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
-        
+
         Constant *AndC =
           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
         InsertNewInstWith(NewAnd, *I);
-        
+
         Constant *XorC =
           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
@@ -364,17 +378,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::Select:
     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
                              RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
     // If the operands are constants, see if we can simplify them.
     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
         ShrinkDemandedConstant(I, 2, DemandedMask))
       return I;
-    
+
     // Only known if known in both the LHS and RHS.
     KnownOne = RHSKnownOne & LHSKnownOne;
     KnownZero = RHSKnownZero & LHSKnownZero;
@@ -384,13 +398,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     DemandedMask = DemandedMask.zext(truncBf);
     KnownZero = KnownZero.zext(truncBf);
     KnownOne = KnownOne.zext(truncBf);
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              KnownZero, KnownOne, Depth+1))
       return I;
     DemandedMask = DemandedMask.trunc(BitWidth);
     KnownZero = KnownZero.trunc(BitWidth);
     KnownOne = KnownOne.trunc(BitWidth);
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
     break;
   }
   case Instruction::BitCast:
@@ -413,12 +427,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              KnownZero, KnownOne, Depth+1))
       return I;
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
     break;
   case Instruction::ZExt: {
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
-    
+
     DemandedMask = DemandedMask.trunc(SrcBitWidth);
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
@@ -428,7 +442,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     DemandedMask = DemandedMask.zext(BitWidth);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
     // The top bits are known to be zero.
     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
     break;
@@ -436,8 +450,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::SExt: {
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
-    
-    APInt InputDemandedBits = DemandedMask & 
+
+    APInt InputDemandedBits = DemandedMask &
                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
 
     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
@@ -445,7 +459,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // bit is demanded.
     if ((NewBits & DemandedMask) != 0)
       InputDemandedBits.setBit(SrcBitWidth-1);
-      
+
     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
@@ -455,8 +469,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     InputDemandedBits = InputDemandedBits.zext(BitWidth);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
-      
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
 
@@ -476,7 +490,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // are not demanded, then the add doesn't demand them from its input
     // either.
     unsigned NLZ = DemandedMask.countLeadingZeros();
-      
+
     // If there is a constant on the RHS, there are a variety of xformations
     // we can do.
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -484,13 +498,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // won't work if the RHS is zero.
       if (RHS->isZero())
         break;
-      
+
       // If the top bit of the output is demanded, demand everything from the
       // input.  Otherwise, we demand all the input bits except NLZ top bits.
       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
 
       // Find information about known zero/one bits in the input.
-      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, 
+      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
                                LHSKnownZero, LHSKnownOne, Depth+1))
         return I;
 
@@ -498,11 +512,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // the constant.
       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
         return I;
-      
+
       // Avoid excess work.
       if (LHSKnownZero == 0 && LHSKnownOne == 0)
         break;
-      
+
       // Turn it into OR if input bits are zero.
       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
         Instruction *Or =
@@ -510,26 +524,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
                                    I->getName());
         return InsertNewInstWith(Or, *I);
       }
-      
+
       // We can say something about the output known-zero and known-one bits,
       // depending on potential carries from the input constant and the
       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
       // bits set and the RHS constant is 0x01001, then we know we have a known
       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
-      
+
       // To compute this, we first compute the potential carry bits.  These are
       // the bits which may be modified.  I'm not aware of a better way to do
       // this scan.
       const APInt &RHSVal = RHS->getValue();
       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
-      
+
       // Now that we know which bits have carries, compute the known-1/0 sets.
-      
+
       // Bits are known one if they are known zero in one operand and one in the
       // other, and there is no input carry.
-      KnownOne = ((LHSKnownZero & RHSVal) | 
+      KnownOne = ((LHSKnownZero & RHSVal) |
                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
-      
+
       // Bits are known zero if they are known zero in both operands and there
       // is no input carry.
       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
@@ -580,17 +594,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      {
+        Value *VarX; ConstantInt *C1;
+        if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
+          Instruction *Shr = cast<Instruction>(I->getOperand(0));
+          Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
+                                                KnownZero, KnownOne);
+          if (R)
+            return R;
+        }
+      }
+
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
-      
+
       // If the shift is NUW/NSW, then it does demand the high bits.
       ShlOperator *IOp = cast<ShlOperator>(I);
       if (IOp->hasNoSignedWrap())
         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
       else if (IOp->hasNoUnsignedWrap())
         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-      
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
+
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
@@ -605,15 +630,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // For a logical shift right
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
-      
+
       // Unsigned shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
-      
+
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
       if (cast<LShrOperator>(I)->isExact())
         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
-      
+
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
@@ -637,28 +662,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       Instruction *NewVal = BinaryOperator::CreateLShr(
                         I->getOperand(0), I->getOperand(1), I->getName());
       return InsertNewInstWith(NewVal, *I);
-    }    
+    }
 
     // If the sign bit is the only bit demanded by this ashr, then there is no
     // need to do it, the shift doesn't change the high bit.
     if (DemandedMask.isSignBit())
       return I->getOperand(0);
-    
+
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
-      
+
       // Signed shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
       // If any of the "high bits" are demanded, we should set the sign bit as
       // demanded.
       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
         DemandedMaskIn.setBit(BitWidth-1);
-      
+
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
       if (cast<AShrOperator>(I)->isExact())
         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
-      
+
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
@@ -667,15 +692,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-        
+
       // Handle the sign bits.
       APInt SignBit(APInt::getSignBit(BitWidth));
       // Adjust to where it is now in the mask.
-      SignBit = APIntOps::lshr(SignBit, ShiftAmt);  
-        
+      SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
-      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 
+      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
           (HighBits & ~DemandedMask) == HighBits) {
         // Perform the logical shift right.
         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
@@ -718,7 +743,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
           KnownOne |= ~LowBits;
 
-        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
       }
     }
 
@@ -756,7 +781,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         // just shift the input byte into position to eliminate the bswap.
         unsigned NLZ = DemandedMask.countLeadingZeros();
         unsigned NTZ = DemandedMask.countTrailingZeros();
-          
+
         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
         // have 14 leading zeros, round to 8.
@@ -766,7 +791,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         if (BitWidth-NLZ-NTZ == 8) {
           unsigned ResultBit = NTZ;
           unsigned InputBit = BitWidth-NTZ-8;
-          
+
           // Replace this with either a left or right shift to get the byte into
           // the right place.
           Instruction *NewVal;
@@ -779,7 +804,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
           NewVal->takeName(I);
           return InsertNewInstWith(NewVal, *I);
         }
-          
+
         // TODO: Could compute known zero/one bits based on the input.
         break;
       }
@@ -792,7 +817,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
     break;
   }
-  
+
   // If the client is only demanding bits that we know, return the known
   // constant.
   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
@@ -800,6 +825,81 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   return 0;
 }
 
+/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
+/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
+/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
+/// of "C2-C1".
+///
+/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
+/// ..., bn}, without considering the specific value X is holding.
+/// This transformation is legal iff one of following conditions is hold:
+///  1) All the bit in S are 0, in this case E1 == E2.
+///  2) We don't care those bits in S, per the input DemandedMask.
+///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
+///     rest bits.
+///
+/// Currently we only test condition 2).
+///
+/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
+/// not successful.
+Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
+  Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
+
+  unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
+  unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+
+  KnownOne.clearAllBits();
+  KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
+  KnownZero &= DemandedMask;
+
+  if (ShlAmt == 0 || ShrAmt == 0)
+    return 0;
+
+  Value *VarX = Shr->getOperand(0);
+  Type *Ty = VarX->getType();
+
+  APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+  APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+
+  bool isLshr = (Shr->getOpcode() == Instruction::LShr);
+  BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
+                      (BitMask1.ashr(ShrAmt) << ShlAmt);
+
+  if (ShrAmt <= ShlAmt) {
+    BitMask2 <<= (ShlAmt - ShrAmt);
+  } else {
+    BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
+                        BitMask2.ashr(ShrAmt - ShlAmt);
+  }
+
+  // Check if condition-2 (see the comment to this function) is satified.
+  if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
+    if (ShrAmt == ShlAmt)
+      return VarX;
+
+    if (!Shr->hasOneUse())
+      return 0;
+
+    BinaryOperator *New;
+    if (ShrAmt < ShlAmt) {
+      Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
+      New = BinaryOperator::CreateShl(VarX, Amt);
+      BinaryOperator *Orig = cast<BinaryOperator>(Shl);
+      New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
+      New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
+    } else {
+      Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
+      New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
+                     BinaryOperator::CreateAShr(VarX, Amt);
+      if (cast<BinaryOperator>(Shr)->isExact())
+        New->setIsExact(true);
+    }
+
+    return InsertNewInstWith(New, *Shl);
+  }
+
+  return 0;
+}
 
 /// SimplifyDemandedVectorElts - The specified value produces a vector with
 /// any number of elements. DemandedElts contains the set of elements that are
@@ -821,14 +921,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     UndefElts = EltMask;
     return 0;
   }
-  
+
   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
     UndefElts = EltMask;
     return UndefValue::get(V->getType());
   }
 
   UndefElts = 0;
-  
+
   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
   if (Constant *C = dyn_cast<Constant>(V)) {
     // Check if this is identity. If so, return 0 since we are not simplifying
@@ -838,7 +938,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
     Constant *Undef = UndefValue::get(EltTy);
-    
+
     SmallVector<Constant*, 16> Elts;
     for (unsigned i = 0; i != VWidth; ++i) {
       if (!DemandedElts[i]) {   // If not demanded, set to undef.
@@ -846,10 +946,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         UndefElts.setBit(i);
         continue;
       }
-      
+
       Constant *Elt = C->getAggregateElement(i);
       if (Elt == 0) return 0;
-      
+
       if (isa<UndefValue>(Elt)) {   // Already undef.
         Elts.push_back(Undef);
         UndefElts.setBit(i);
@@ -857,12 +957,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         Elts.push_back(Elt);
       }
     }
-    
+
     // If we changed the constant, return it.
     Constant *NewCV = ConstantVector::get(Elts);
     return NewCV != C ? NewCV : 0;
   }
-  
+
   // Limit search depth.
   if (Depth == 10)
     return 0;
@@ -881,16 +981,16 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // Conservatively assume that all elements are needed.
     DemandedElts = EltMask;
   }
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return 0;        // Only analyze instructions.
-  
+
   bool MadeChange = false;
   APInt UndefElts2(VWidth, 0);
   Value *TmpV;
   switch (I->getOpcode()) {
   default: break;
-    
+
   case Instruction::InsertElement: {
     // If this is a variable index, we don't know which element it overwrites.
     // demand exactly the same input as we produce.
@@ -903,7 +1003,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
       break;
     }
-    
+
     // If this is inserting an element that isn't demanded, remove this
     // insertelement.
     unsigned IdxNo = Idx->getZExtValue();
@@ -911,7 +1011,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       Worklist.Add(I);
       return I->getOperand(0);
     }
-    
+
     // Otherwise, the element inserted overwrites whatever was there, so the
     // input demanded set is simpler than the output set.
     APInt DemandedElts2 = DemandedElts;
@@ -1007,7 +1107,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
                                       UndefElts2, Depth+1);
     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
-      
+
     // Output elements are undefined if both are undefined.
     UndefElts &= UndefElts2;
     break;
@@ -1028,7 +1128,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     } else if (VWidth > InVWidth) {
       // Untested so far.
       break;
-      
+
       // If there are more elements in the result than there are in the source,
       // then an input element is live if any of the corresponding output
       // elements are live.
@@ -1040,7 +1140,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     } else {
       // Untested so far.
       break;
-      
+
       // If there are more elements in the source than there are in the result,
       // then an input element is live if the corresponding output element is
       // live.
@@ -1049,7 +1149,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         if (DemandedElts[InIdx/Ratio])
           InputDemandedElts.setBit(InIdx);
     }
-    
+
     // div/rem demand all inputs, because they don't want divide by zero.
     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
                                       UndefElts2, Depth+1);
@@ -1057,7 +1157,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       I->setOperand(0, TmpV);
       MadeChange = true;
     }
-    
+
     UndefElts = UndefElts2;
     if (VWidth > InVWidth) {
       llvm_unreachable("Unimp");
@@ -1092,7 +1192,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
                                       UndefElts2, Depth+1);
     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-      
+
     // Output elements are undefined if both are undefined.  Consider things
     // like undef&0.  The result is known zero, not undef.
     UndefElts &= UndefElts2;
@@ -1103,13 +1203,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                       UndefElts, Depth+1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
     break;
-    
+
   case Instruction::Call: {
     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
     if (!II) break;
     switch (II->getIntrinsicID()) {
     default: break;
-      
+
     // Binary vector operations that work column-wise.  A dest element is a
     // function of the corresponding input elements from the two inputs.
     case Intrinsic::x86_sse_sub_ss:
@@ -1140,11 +1240,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           Value *LHS = II->getArgOperand(0);
           Value *RHS = II->getArgOperand(1);
           // Extract the element as scalars.
-          LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, 
+          LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
-          
+
           switch (II->getIntrinsicID()) {
           default: llvm_unreachable("Case stmts out of sync!");
           case Intrinsic::x86_sse_sub_ss:
@@ -1158,7 +1258,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                                          II->getName()), *II);
             break;
           }
-          
+
           Instruction *New =
             InsertElementInst::Create(
               UndefValue::get(II->getType()), TmpV,
@@ -1166,9 +1266,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                       II->getName());
           InsertNewInstWith(New, *II);
           return New;
-        }            
+        }
       }
-        
+
       // Output elements are undefined if both are undefined.  Consider things
       // like undef&0.  The result is known zero, not undef.
       UndefElts &= UndefElts2;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index dd7ea14..4f71db1 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
 using namespace llvm;
+using namespace PatternMatch;
 
 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
 /// is to leave as a vector operation.  isConstant indicates whether we're
@@ -92,6 +94,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
     return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
   }
 
+  // Extract a value from a vector add operation with a constant zero.
+  Value *Val = 0; Constant *Con = 0;
+  if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
+    if (Con->getAggregateElement(EltNo)->isNullValue())
+      return FindScalarElement(Val, EltNo);
+  }
+
   // Otherwise, we don't know.
   return 0;
 }
@@ -295,12 +304,12 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
     return V;
   }
-  
+
   if (isa<ConstantAggregateZero>(V)) {
     Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
     return V;
   }
-  
+
   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
     Value *VecOp    = IEI->getOperand(0);
@@ -595,12 +604,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   // ShuffleVectorInst is equivalent to the original one.
   for (unsigned i = 0; i < VWidth; ++i) {
     int eltMask;
-    if (Mask[i] == -1) {
+    if (Mask[i] < 0) {
       // This element is an undef value.
       eltMask = -1;
     } else if (Mask[i] < (int)LHSWidth) {
       // This element is from left hand side vector operand.
-      // 
+      //
       // If LHS is going to be replaced (case 1, 2, or 4), calculate the
       // new mask value for the element.
       if (newLHS != LHS) {
@@ -609,8 +618,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
         // with a -1 mask value.
         if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
           eltMask = -1;
-      }
-      else
+      } else
         eltMask = Mask[i];
     } else {
       // This element is from right hand side vector operand
@@ -630,8 +638,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
                  && "should have been check above");
           eltMask = -1;
         }
-      }
-      else
+      } else
         eltMask = Mask[i]-LHSWidth;
 
       // If LHS's width is changed, shift the mask value accordingly.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
index ea654ae..49efce5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -11,28 +11,28 @@
 #define INSTCOMBINE_WORKLIST_H
 
 #define DEBUG_TYPE "instcombine"
-#include "llvm/Instruction.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
-  
+
 /// InstCombineWorklist - This is the worklist management logic for
 /// InstCombine.
 class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
   SmallVector<Instruction*, 256> Worklist;
   DenseMap<Instruction*, unsigned> WorklistMap;
-  
+
   void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
   InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
 public:
   InstCombineWorklist() {}
-  
+
   bool isEmpty() const { return Worklist.empty(); }
-  
+
   /// Add - Add the specified instruction to the worklist if it isn't already
   /// in it.
   void Add(Instruction *I) {
@@ -41,12 +41,12 @@ public:
       Worklist.push_back(I);
     }
   }
-  
+
   void AddValue(Value *V) {
     if (Instruction *I = dyn_cast<Instruction>(V))
       Add(I);
   }
-  
+
   /// AddInitialGroup - Add the specified batch of stuff in reverse order.
   /// which should only be done when the worklist is empty and when the group
   /// has no duplicates.
@@ -61,25 +61,25 @@ public:
       Worklist.push_back(I);
     }
   }
-  
+
   // Remove - remove I from the worklist if it exists.
   void Remove(Instruction *I) {
     DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
     if (It == WorklistMap.end()) return; // Not in worklist.
-    
+
     // Don't bother moving everything down, just null out the slot.
     Worklist[It->second] = 0;
-    
+
     WorklistMap.erase(It);
   }
-  
+
   Instruction *RemoveOne() {
     Instruction *I = Worklist.back();
     Worklist.pop_back();
     WorklistMap.erase(I);
     return I;
   }
-  
+
   /// AddUsersToWorkList - When an instruction is simplified, add all users of
   /// the instruction to the work lists because they might get more simplified
   /// now.
@@ -89,18 +89,18 @@ public:
          UI != UE; ++UI)
       Add(cast<Instruction>(*UI));
   }
-  
-  
+
+
   /// Zap - check that the worklist is empty and nuke the backing store for
   /// the map if it is large.
   void Zap() {
     assert(WorklistMap.empty() && "Worklist empty, but map not?");
-    
+
     // Do an explicit clear, this shrinks the map if needed.
     WorklistMap.clear();
   }
 };
-  
+
 } // end namespace llvm.
 
 #endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9a46f25..c6115e3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -36,22 +36,23 @@
 #define DEBUG_TYPE "instcombine"
 #include "llvm/Transforms/Scalar.h"
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm-c/Initialization.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <climits>
 using namespace llvm;
@@ -65,6 +66,11 @@ STATISTIC(NumExpand,    "Number of expansions");
 STATISTIC(NumFactor   , "Number of factorizations");
 STATISTIC(NumReassoc  , "Number of reassociations");
 
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+                                   cl::init(false),
+                                   cl::desc("Enable unsafe double to float "
+                                            "shrinking for math lib calls"));
+
 // Initialization Routines
 void llvm::initializeInstCombine(PassRegistry &Registry) {
   initializeInstCombinerPass(Registry);
@@ -156,6 +162,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
   return !Overflow;
 }
 
+/// Conservatively clears subclassOptionalData after a reassociation or
+/// commutation. We preserve fast-math flags when applicable as they can be
+/// preserved.
+static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
+  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
+  if (!FPMO) {
+    I.clearSubclassOptionalData();
+    return;
+  }
+
+  FastMathFlags FMF = I.getFastMathFlags();
+  I.clearSubclassOptionalData();
+  I.setFastMathFlags(FMF);
+}
+
 /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
 /// operators which are associative or commutative:
 //
@@ -213,7 +234,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
             I.clearSubclassOptionalData();
             I.setHasNoSignedWrap(true);
           } else {
-            I.clearSubclassOptionalData();
+            ClearSubclassDataAfterReassociation(I);
           }
 
           Changed = true;
@@ -235,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           I.setOperand(1, C);
           // Conservatively clear the optional flags, since they may not be
           // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
           Changed = true;
           ++NumReassoc;
           continue;
@@ -257,7 +278,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           I.setOperand(1, B);
           // Conservatively clear the optional flags, since they may not be
           // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
           Changed = true;
           ++NumReassoc;
           continue;
@@ -277,7 +298,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           I.setOperand(1, V);
           // Conservatively clear the optional flags, since they may not be
           // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
           Changed = true;
           ++NumReassoc;
           continue;
@@ -304,7 +325,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         I.setOperand(1, Folded);
         // Conservatively clear the optional flags, since they may not be
         // preserved by the reassociation.
-        I.clearSubclassOptionalData();
+        ClearSubclassDataAfterReassociation(I);
 
         Changed = true;
         continue;
@@ -510,8 +531,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
 // instruction if the LHS is a constant negative zero (which is the 'negate'
 // form).
 //
-Value *InstCombiner::dyn_castFNegVal(Value *V) const {
-  if (BinaryOperator::isFNeg(V))
+Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
+  if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
     return BinaryOperator::getFNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
@@ -1303,17 +1324,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   /// into a gep of the original struct.  This is important for SROA and alias
   /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+    APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
     if (TD &&
-        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
+        !isa<BitCastInst>(BCI->getOperand(0)) &&
+        GEP.accumulateConstantOffset(*TD, Offset) &&
         StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
 
-      // Determine how much the GEP moves the pointer.
-      SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end());
-      int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops);
-
       // If this GEP instruction doesn't move the pointer, just replace the GEP
       // with a bitcast of the real input to the dest type.
-      if (Offset == 0) {
+      if (!Offset) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
         if (isa<AllocaInst>(BCI->getOperand(0)) ||
@@ -1337,7 +1356,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       SmallVector<Value*, 8> NewIndices;
       Type *InTy =
         cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
-      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+      if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP = GEP.isInBounds() ?
           Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
           Builder->CreateGEP(BCI->getOperand(0), NewIndices);
@@ -1471,6 +1490,62 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
   return 0;
 }
 
+/// \brief Move the call to free before a NULL test.
+///
+/// Check if this free is accessed after its argument has been test
+/// against NULL (property 0).
+/// If yes, it is legal to move this call in its predecessor block.
+///
+/// The move is performed only if the block containing the call to free
+/// will be removed, i.e.:
+/// 1. it has only one predecessor P, and P has two successors
+/// 2. it contains the call and an unconditional branch
+/// 3. its successor is the same as its predecessor's successor
+///
+/// The profitability is out-of concern here and this function should
+/// be called only if the caller knows this transformation would be
+/// profitable (e.g., for code size).
+static Instruction *
+tryToMoveFreeBeforeNullTest(CallInst &FI) {
+  Value *Op = FI.getArgOperand(0);
+  BasicBlock *FreeInstrBB = FI.getParent();
+  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
+
+  // Validate part of constraint #1: Only one predecessor
+  // FIXME: We can extend the number of predecessor, but in that case, we
+  //        would duplicate the call to free in each predecessor and it may
+  //        not be profitable even for code size.
+  if (!PredBB)
+    return 0;
+
+  // Validate constraint #2: Does this block contains only the call to
+  //                         free and an unconditional branch?
+  // FIXME: We could check if we can speculate everything in the
+  //        predecessor block
+  if (FreeInstrBB->size() != 2)
+    return 0;
+  BasicBlock *SuccBB;
+  if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
+    return 0;
+
+  // Validate the rest of constraint #1 by matching on the pred branch.
+  TerminatorInst *TI = PredBB->getTerminator();
+  BasicBlock *TrueBB, *FalseBB;
+  ICmpInst::Predicate Pred;
+  if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
+    return 0;
+  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
+    return 0;
+
+  // Validate constraint #3: Ensure the null case just falls through.
+  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
+    return 0;
+  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
+         "Broken CFG: missing edge from predecessor to successor");
+
+  FI.moveBefore(TI);
+  return &FI;
+}
 
 
 Instruction *InstCombiner::visitFree(CallInst &FI) {
@@ -1489,6 +1564,16 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
   if (isa<ConstantPointerNull>(Op))
     return EraseInstFromFunction(FI);
 
+  // If we optimize for code size, try to move the call to free before the null
+  // test so that simplify cfg can remove the empty block and dead code
+  // elimination the branch. I.e., helps to turn something like:
+  // if (foo) free(foo);
+  // into
+  // free(foo);
+  if (MinimizeSize)
+    if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
+      return I;
+
   return 0;
 }
 
@@ -2374,7 +2459,7 @@ public:
   InstCombinerLibCallSimplifier(const DataLayout *TD,
                                 const TargetLibraryInfo *TLI,
                                 InstCombiner *IC)
-    : LibCallSimplifier(TD, TLI) {
+    : LibCallSimplifier(TD, TLI, UnsafeFPShrink) {
     this->IC = IC;
   }
 
@@ -2389,6 +2474,9 @@ public:
 bool InstCombiner::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<DataLayout>();
   TLI = &getAnalysis<TargetLibraryInfo>();
+  // Minimizing size?
+  MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                Attribute::MinSize);
 
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.