Update LLVM sources to r73879.

author: ed <ed@FreeBSD.org> 2009-06-22 08:08:12 +0000
committer: ed <ed@FreeBSD.org> 2009-06-22 08:08:12 +0000
commit: a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe (patch)
tree: 86c1bc482baa6c81fc70b8d715153bfa93377186 /lib
parent: db89e312d968c258aba3c79c1c398f5fb19267a3 (diff)
download: FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.zip
FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.tar.gz
161 files changed, 5683 insertions, 1930 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 261c635..5aa4d56 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -365,7 +365,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
       if (TD && CE->getOpcode() == Instruction::IntToPtr) {
         Constant *Input = CE->getOperand(0);
-        unsigned InWidth = Input->getType()->getPrimitiveSizeInBits();
+        unsigned InWidth = Input->getType()->getScalarSizeInBits();
         if (TD->getPointerSizeInBits() < InWidth) {
           Constant *Mask = 
             ConstantInt::get(APInt::getLowBitsSet(InWidth,
@@ -384,7 +384,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
       if (TD &&
           TD->getPointerSizeInBits() <=
-          CE->getType()->getPrimitiveSizeInBits()) {
+          CE->getType()->getScalarSizeInBits()) {
         if (CE->getOpcode() == Instruction::PtrToInt) {
           Constant *Input = CE->getOperand(0);
           Constant *C = FoldBitCast(Input, DestTy, *TD);
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 6bdb64c..adda5ee 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -352,7 +352,7 @@ Constant *DIFactory::GetStringConstant(const std::string &String) {
   
   const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty);
   
-  // If empty string then use a sbyte* null instead.
+  // If empty string then use a i8* null instead.
   if (String.empty())
     return Slot = ConstantPointerNull::get(DestTy);
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 7af9130..6a53a83 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -82,11 +82,8 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
 /// outer loop of the current loop.
 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
                                   SCEVHandle &Start, SCEVHandle &Stride,
-                                  bool &isSigned,
                                   ScalarEvolution *SE, DominatorTree *DT) {
   SCEVHandle TheAddRec = Start;   // Initialize to zero.
-  bool isSExt = false;
-  bool isZExt = false;
 
   // If the outer level is an AddExpr, the operands are all start values except
   // for a nested AddRecExpr.
@@ -101,13 +98,6 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
       } else {
         Start = SE->getAddExpr(Start, AE->getOperand(i));
       }
-
-  } else if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(SH)) {
-    TheAddRec = Z->getOperand();
-    isZExt = true;
-  } else if (const SCEVSignExtendExpr *S = dyn_cast<SCEVSignExtendExpr>(SH)) {
-    TheAddRec = S->getOperand();
-    isSExt = true;
   } else if (isa<SCEVAddRecExpr>(SH)) {
     TheAddRec = SH;
   } else {
@@ -120,9 +110,8 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
   // Use getSCEVAtScope to attempt to simplify other loops out of
   // the picture.
   SCEVHandle AddRecStart = AddRec->getStart();
-  SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-  if (!isa<SCEVCouldNotCompute>(BetterAddRecStart))
-    AddRecStart = BetterAddRecStart;
+  AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
+  SCEVHandle AddRecStride = AddRec->getStepRecurrence(*SE);
 
   // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
   // than an outer loop of the current loop, reject it.  LSR has no concept of
@@ -131,24 +120,20 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
   if (containsAddRecFromDifferentLoop(AddRecStart, L))
     return false;
 
-  if (isSExt || isZExt)
-    Start = SE->getTruncateExpr(Start, AddRec->getType());
-
   Start = SE->getAddExpr(Start, AddRecStart);
 
-  if (!isa<SCEVConstant>(AddRec->getStepRecurrence(*SE))) {
-    // If stride is an instruction, make sure it dominates the loop preheader.
-    // Otherwise we could end up with a use before def situation.
+  // If stride is an instruction, make sure it dominates the loop preheader.
+  // Otherwise we could end up with a use before def situation.
+  if (!isa<SCEVConstant>(AddRecStride)) {
     BasicBlock *Preheader = L->getLoopPreheader();
-    if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT))
+    if (!AddRecStride->dominates(Preheader, DT))
       return false;
 
     DOUT << "[" << L->getHeader()->getName()
          << "] Variable stride: " << *AddRec << "\n";
   }
 
-  Stride = AddRec->getStepRecurrence(*SE);
-  isSigned = isSExt;
+  Stride = AddRecStride;
   return true;
 }
 
@@ -218,9 +203,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   Loop *UseLoop = LI->getLoopFor(I->getParent());
   SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
   SCEVHandle Stride = Start;
-  bool isSigned = false; // Arbitrary initial value - pacifies compiler.
 
-  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT))
+  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
     return false;  // Non-reducible symbolic expression, bail out.
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -271,11 +255,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
         // The value used will be incremented by the stride more than we are
         // expecting, so subtract this off.
         SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
-        StrideUses->addUser(NewStart, User, I, isSigned);
+        StrideUses->addUser(NewStart, User, I);
         StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
         DOUT << "   USING POSTINC SCEV, START=" << *NewStart<< "\n";
       } else {
-        StrideUses->addUser(Start, User, I, isSigned);
+        StrideUses->addUser(Start, User, I);
       }
     }
   }
@@ -312,7 +296,6 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 /// getReplacementExpr - Return a SCEV expression which computes the
 /// value of the OperandValToReplace of the given IVStrideUse.
 SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
-  const Type *UseTy = U.getOperandValToReplace()->getType();
   // Start with zero.
   SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
   // Create the basic add recurrence.
@@ -326,17 +309,9 @@ SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // Evaluate the expression out of the loop, if possible.
   if (!L->contains(U.getUser()->getParent())) {
     SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
-    if (!isa<SCEVCouldNotCompute>(ExitVal) && ExitVal->isLoopInvariant(L))
+    if (ExitVal->isLoopInvariant(L))
       RetVal = ExitVal;
   }
-  // Promote the result to the type of the use.
-  if (SE->getTypeSizeInBits(RetVal->getType()) !=
-      SE->getTypeSizeInBits(UseTy)) {
-    if (U.isSigned())
-      RetVal = SE->getSignExtendExpr(RetVal, UseTy);
-    else
-      RetVal = SE->getZeroExtendExpr(RetVal, UseTy);
-  }
   return RetVal;
 }
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 98ab6f4..68aa595 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -68,6 +68,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CommandLine.h"
@@ -132,7 +133,8 @@ bool SCEV::isOne() const {
   return false;
 }
 
-SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(scCouldNotCompute) {}
+SCEVCouldNotCompute::SCEVCouldNotCompute(const ScalarEvolution* p) :
+  SCEV(scCouldNotCompute, p) {}
 SCEVCouldNotCompute::~SCEVCouldNotCompute() {}
 
 bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
@@ -178,7 +180,7 @@ SCEVConstant::~SCEVConstant() {
 
 SCEVHandle ScalarEvolution::getConstant(ConstantInt *V) {
   SCEVConstant *&R = (*SCEVConstants)[V];
-  if (R == 0) R = new SCEVConstant(V);
+  if (R == 0) R = new SCEVConstant(V, this);
   return R;
 }
 
@@ -186,6 +188,11 @@ SCEVHandle ScalarEvolution::getConstant(const APInt& Val) {
   return getConstant(ConstantInt::get(Val));
 }
 
+SCEVHandle
+ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
+  return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
+}
+
 const Type *SCEVConstant::getType() const { return V->getType(); }
 
 void SCEVConstant::print(raw_ostream &OS) const {
@@ -193,8 +200,9 @@ void SCEVConstant::print(raw_ostream &OS) const {
 }
 
 SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy,
-                           const SCEVHandle &op, const Type *ty)
-  : SCEV(SCEVTy), Op(op), Ty(ty) {}
+                           const SCEVHandle &op, const Type *ty,
+                           const ScalarEvolution* p)
+  : SCEV(SCEVTy, p), Op(op), Ty(ty) {}
 
 SCEVCastExpr::~SCEVCastExpr() {}
 
@@ -208,8 +216,9 @@ bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
 static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, 
                      SCEVTruncateExpr*> > SCEVTruncates;
 
-SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty)
-  : SCEVCastExpr(scTruncate, op, ty) {
+SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty,
+                                   const ScalarEvolution* p)
+  : SCEVCastExpr(scTruncate, op, ty, p) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate non-integer value!");
@@ -229,8 +238,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
 static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
                      SCEVZeroExtendExpr*> > SCEVZeroExtends;
 
-SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty)
-  : SCEVCastExpr(scZeroExtend, op, ty) {
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty,
+                                       const ScalarEvolution* p)
+  : SCEVCastExpr(scZeroExtend, op, ty, p) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot zero extend non-integer value!");
@@ -250,8 +260,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
 static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
                      SCEVSignExtendExpr*> > SCEVSignExtends;
 
-SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty)
-  : SCEVCastExpr(scSignExtend, op, ty) {
+SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty,
+                                       const ScalarEvolution* p)
+  : SCEVCastExpr(scSignExtend, op, ty, p) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot sign extend non-integer value!");
@@ -293,7 +304,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
     SCEVHandle H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
     if (H != getOperand(i)) {
-      std::vector<SCEVHandle> NewOps;
+      SmallVector<SCEVHandle, 8> NewOps;
       NewOps.reserve(getNumOperands());
       for (unsigned j = 0; j != i; ++j)
         NewOps.push_back(getOperand(j));
@@ -373,7 +384,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
     SCEVHandle H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
     if (H != getOperand(i)) {
-      std::vector<SCEVHandle> NewOps;
+      SmallVector<SCEVHandle, 8> NewOps;
       NewOps.reserve(getNumOperands());
       for (unsigned j = 0; j != i; ++j)
         NewOps.push_back(getOperand(j));
@@ -504,9 +515,18 @@ namespace {
         return false;
       }
 
-      // Constant sorting doesn't matter since they'll be folded.
-      if (isa<SCEVConstant>(LHS))
-        return false;
+      // Compare constant values.
+      if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) {
+        const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+        return LC->getValue()->getValue().ult(RC->getValue()->getValue());
+      }
+
+      // Compare addrec loop depths.
+      if (const SCEVAddRecExpr *LA = dyn_cast<SCEVAddRecExpr>(LHS)) {
+        const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+        if (LA->getLoop()->getLoopDepth() != RA->getLoop()->getLoopDepth())
+          return LA->getLoop()->getLoopDepth() < RA->getLoop()->getLoopDepth();
+      }
 
       // Lexicographically compare n-ary expressions.
       if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) {
@@ -558,7 +578,7 @@ namespace {
 /// this to depend on where the addresses of various SCEV objects happened to
 /// land in memory.
 ///
-static void GroupByComplexity(std::vector<SCEVHandle> &Ops,
+static void GroupByComplexity(SmallVectorImpl<SCEVHandle> &Ops,
                               LoopInfo *LI) {
   if (Ops.size() < 2) return;  // Noop
   if (Ops.size() == 2) {
@@ -763,17 +783,16 @@ SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op,
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
 
-  // If the input value is a chrec scev made out of constants, truncate
-  // all of the constants.
+  // If the input value is a chrec scev, truncate the chrec's operands.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
-    std::vector<SCEVHandle> Operands;
+    SmallVector<SCEVHandle, 4> Operands;
     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
     return getAddRecExpr(Operands, AddRec->getLoop());
   }
 
   SCEVTruncateExpr *&Result = (*SCEVTruncates)[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty);
+  if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty, this);
   return Result;
 }
 
@@ -861,7 +880,7 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op,
     }
 
   SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty);
+  if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty, this);
   return Result;
 }
 
@@ -933,7 +952,7 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
     }
 
   SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty);
+  if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty, this);
   return Result;
 }
 
@@ -979,9 +998,105 @@ SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op,
   return ZExt;
 }
 
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<SCEVHandle, APInt> &M,
+                             SmallVector<SCEVHandle, 8> &NewOps,
+                             APInt &AccumulatedConstant,
+                             const SmallVectorImpl<SCEVHandle> &Ops,
+                             const APInt &Scale,
+                             ScalarEvolution &SE) {
+  bool Interesting = false;
+
+  // Iterate over the add operands.
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+    if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+      APInt NewScale =
+        Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+      if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+        // A multiplication of a constant with another add; recurse.
+        Interesting |=
+          CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                       cast<SCEVAddExpr>(Mul->getOperand(1))
+                                         ->getOperands(),
+                                       NewScale, SE);
+      } else {
+        // A multiplication of a constant with some other value. Update
+        // the map.
+        SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+        SCEVHandle Key = SE.getMulExpr(MulOps);
+        std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair =
+          M.insert(std::make_pair(Key, APInt()));
+        if (Pair.second) {
+          Pair.first->second = NewScale;
+          NewOps.push_back(Pair.first->first);
+        } else {
+          Pair.first->second += NewScale;
+          // The map already had an entry for this value, which may indicate
+          // a folding opportunity.
+          Interesting = true;
+        }
+      }
+    } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+      // Pull a buried constant out to the outside.
+      if (Scale != 1 || AccumulatedConstant != 0 || C->isZero())
+        Interesting = true;
+      AccumulatedConstant += Scale * C->getValue()->getValue();
+    } else {
+      // An ordinary operand. Update the map.
+      std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair =
+        M.insert(std::make_pair(Ops[i], APInt()));
+      if (Pair.second) {
+        Pair.first->second = Scale;
+        NewOps.push_back(Pair.first->first);
+      } else {
+        Pair.first->second += Scale;
+        // The map already had an entry for this value, which may indicate
+        // a folding opportunity.
+        Interesting = true;
+      }
+    }
+  }
+
+  return Interesting;
+}
+
+namespace {
+  struct APIntCompare {
+    bool operator()(const APInt &LHS, const APInt &RHS) const {
+      return LHS.ult(RHS);
+    }
+  };
+}
+
 /// getAddExpr - Get a canonical add expression, or something simpler if
 /// possible.
-SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
+SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty add!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1001,11 +1116,10 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() + 
-                                           RHSC->getValue()->getValue());
-      Ops[0] = getConstant(Fold);
+      Ops[0] = getConstant(LHSC->getValue()->getValue() +
+                           RHSC->getValue()->getValue());
+      if (Ops.size() == 2) return Ops[0];
       Ops.erase(Ops.begin()+1);  // Erase the folded element
-      if (Ops.size() == 1) return Ops[0];
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
@@ -1043,7 +1157,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
     const Type *DstType = Trunc->getType();
     const Type *SrcType = Trunc->getOperand()->getType();
-    std::vector<SCEVHandle> LargeOps;
+    SmallVector<SCEVHandle, 8> LargeOps;
     bool Ok = true;
     // Check all the operands to see if they can be represented in the
     // source type of the truncate.
@@ -1059,7 +1173,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
         // is much more likely to be foldable here.
         LargeOps.push_back(getSignExtendExpr(C, SrcType));
       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
-        std::vector<SCEVHandle> LargeMulOps;
+        SmallVector<SCEVHandle, 8> LargeMulOps;
         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
           if (const SCEVTruncateExpr *T =
                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
@@ -1120,6 +1234,38 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
     ++Idx;
 
+  // Check to see if there are any folding opportunities present with
+  // operands multiplied by constant values.
+  if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+    uint64_t BitWidth = getTypeSizeInBits(Ty);
+    DenseMap<SCEVHandle, APInt> M;
+    SmallVector<SCEVHandle, 8> NewOps;
+    APInt AccumulatedConstant(BitWidth, 0);
+    if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                     Ops, APInt(BitWidth, 1), *this)) {
+      // Some interesting folding opportunity is present, so its worthwhile to
+      // re-generate the operands list. Group the operands by constant scale,
+      // to avoid multiplying by the same constant scale multiple times.
+      std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare> MulOpLists;
+      for (SmallVector<SCEVHandle, 8>::iterator I = NewOps.begin(),
+           E = NewOps.end(); I != E; ++I)
+        MulOpLists[M.find(*I)->second].push_back(*I);
+      // Re-generate the operands list.
+      Ops.clear();
+      if (AccumulatedConstant != 0)
+        Ops.push_back(getConstant(AccumulatedConstant));
+      for (std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare>::iterator I =
+           MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+        if (I->first != 0)
+          Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second)));
+      if (Ops.empty())
+        return getIntegerSCEV(0, Ty);
+      if (Ops.size() == 1)
+        return Ops[0];
+      return getAddExpr(Ops);
+    }
+  }
+
   // If we are adding something to a multiply expression, make sure the
   // something is not already an operand of the multiply.  If so, merge it into
   // the multiply.
@@ -1128,13 +1274,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
-        if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(MulOpSCEV)) {
+        if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) {
           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
           SCEVHandle InnerMul = Mul->getOperand(MulOp == 0);
           if (Mul->getNumOperands() != 2) {
             // If the multiply has more than two operands, we must get the
             // Y*Z term.
-            std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+            SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end());
             MulOps.erase(MulOps.begin()+MulOp);
             InnerMul = getMulExpr(MulOps);
           }
@@ -1166,13 +1312,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
             SCEVHandle InnerMul1 = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
-              std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+              SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end());
               MulOps.erase(MulOps.begin()+MulOp);
               InnerMul1 = getMulExpr(MulOps);
             }
             SCEVHandle InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
-              std::vector<SCEVHandle> MulOps(OtherMul->op_begin(),
+              SmallVector<SCEVHandle, 4> MulOps(OtherMul->op_begin(),
                                              OtherMul->op_end());
               MulOps.erase(MulOps.begin()+OMulOp);
               InnerMul2 = getMulExpr(MulOps);
@@ -1199,7 +1345,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this add and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    std::vector<SCEVHandle> LIOps;
+    SmallVector<SCEVHandle, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1213,7 +1359,8 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
       LIOps.push_back(AddRec->getStart());
 
-      std::vector<SCEVHandle> AddRecOps(AddRec->op_begin(), AddRec->op_end());
+      SmallVector<SCEVHandle, 4> AddRecOps(AddRec->op_begin(),
+                                           AddRec->op_end());
       AddRecOps[0] = getAddExpr(LIOps);
 
       SCEVHandle NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
@@ -1238,7 +1385,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
         const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
         if (AddRec->getLoop() == OtherAddRec->getLoop()) {
           // Other + {A,+,B} + {C,+,D}  -->  Other + {A+C,+,B+D}
-          std::vector<SCEVHandle> NewOps(AddRec->op_begin(), AddRec->op_end());
+          SmallVector<SCEVHandle, 4> NewOps(AddRec->op_begin(), AddRec->op_end());
           for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
             if (i >= NewOps.size()) {
               NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i,
@@ -1267,14 +1414,14 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
   std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scAddExpr,
                                                                  SCEVOps)];
-  if (Result == 0) Result = new SCEVAddExpr(Ops);
+  if (Result == 0) Result = new SCEVAddExpr(Ops, this);
   return Result;
 }
 
 
 /// getMulExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
-SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
+SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty mul!");
 #ifndef NDEBUG
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
@@ -1355,7 +1502,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this mul and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    std::vector<SCEVHandle> LIOps;
+    SmallVector<SCEVHandle, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1367,7 +1514,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
     // If we found some loop invariants, fold them into the recurrence.
     if (!LIOps.empty()) {
       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
-      std::vector<SCEVHandle> NewOps;
+      SmallVector<SCEVHandle, 4> NewOps;
       NewOps.reserve(AddRec->getNumOperands());
       if (LIOps.size() == 1) {
         const SCEV *Scale = LIOps[0];
@@ -1375,7 +1522,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
           NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
       } else {
         for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
-          std::vector<SCEVHandle> MulOps(LIOps);
+          SmallVector<SCEVHandle, 4> MulOps(LIOps.begin(), LIOps.end());
           MulOps.push_back(AddRec->getOperand(i));
           NewOps.push_back(getMulExpr(MulOps));
         }
@@ -1433,7 +1580,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scMulExpr,
                                                                  SCEVOps)];
   if (Result == 0)
-    Result = new SCEVMulExpr(Ops);
+    Result = new SCEVMulExpr(Ops, this);
   return Result;
 }
 
@@ -1473,14 +1620,14 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
             getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                           getZeroExtendExpr(Step, ExtTy),
                           AR->getLoop())) {
-          std::vector<SCEVHandle> Operands;
+          SmallVector<SCEVHandle, 4> Operands;
           for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
             Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
           return getAddRecExpr(Operands, AR->getLoop());
         }
     // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
-      std::vector<SCEVHandle> Operands;
+      SmallVector<SCEVHandle, 4> Operands;
       for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
       if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
@@ -1489,7 +1636,9 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
           SCEVHandle Op = M->getOperand(i);
           SCEVHandle Div = getUDivExpr(Op, RHSC);
           if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            Operands = M->getOperands();
+            const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands();
+            Operands = SmallVector<SCEVHandle, 4>(MOperands.begin(),
+                                                  MOperands.end());
             Operands[i] = Div;
             return getMulExpr(Operands);
           }
@@ -1497,7 +1646,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
     }
     // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
     if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
-      std::vector<SCEVHandle> Operands;
+      SmallVector<SCEVHandle, 4> Operands;
       for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
       if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
@@ -1522,7 +1671,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
   }
 
   SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)];
-  if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS);
+  if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS, this);
   return Result;
 }
 
@@ -1531,7 +1680,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
 /// Simplify the expression as much as possible.
 SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start,
                                const SCEVHandle &Step, const Loop *L) {
-  std::vector<SCEVHandle> Operands;
+  SmallVector<SCEVHandle, 4> Operands;
   Operands.push_back(Start);
   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
     if (StepChrec->getLoop() == L) {
@@ -1546,7 +1695,7 @@ SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start,
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
-SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
+SCEVHandle ScalarEvolution::getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands,
                                           const Loop *L) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
@@ -1565,8 +1714,8 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop* NestedLoop = NestedAR->getLoop();
     if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
-      std::vector<SCEVHandle> NestedOperands(NestedAR->op_begin(),
-                                             NestedAR->op_end());
+      SmallVector<SCEVHandle, 4> NestedOperands(NestedAR->op_begin(),
+                                                NestedAR->op_end());
       SCEVHandle NestedARHandle(NestedAR);
       Operands[0] = NestedAR->getStart();
       NestedOperands[0] = getAddRecExpr(Operands, L);
@@ -1576,19 +1725,20 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
 
   std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end());
   SCEVAddRecExpr *&Result = (*SCEVAddRecExprs)[std::make_pair(L, SCEVOps)];
-  if (Result == 0) Result = new SCEVAddRecExpr(Operands, L);
+  if (Result == 0) Result = new SCEVAddRecExpr(Operands, L, this);
   return Result;
 }
 
 SCEVHandle ScalarEvolution::getSMaxExpr(const SCEVHandle &LHS,
                                         const SCEVHandle &RHS) {
-  std::vector<SCEVHandle> Ops;
+  SmallVector<SCEVHandle, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getSMaxExpr(Ops);
 }
 
-SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) {
+SCEVHandle
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty smax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1662,19 +1812,20 @@ SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) {
   std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scSMaxExpr,
                                                                  SCEVOps)];
-  if (Result == 0) Result = new SCEVSMaxExpr(Ops);
+  if (Result == 0) Result = new SCEVSMaxExpr(Ops, this);
   return Result;
 }
 
 SCEVHandle ScalarEvolution::getUMaxExpr(const SCEVHandle &LHS,
                                         const SCEVHandle &RHS) {
-  std::vector<SCEVHandle> Ops;
+  SmallVector<SCEVHandle, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getUMaxExpr(Ops);
 }
 
-SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) {
+SCEVHandle
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty umax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1748,17 +1899,29 @@ SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) {
   std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scUMaxExpr,
                                                                  SCEVOps)];
-  if (Result == 0) Result = new SCEVUMaxExpr(Ops);
+  if (Result == 0) Result = new SCEVUMaxExpr(Ops, this);
   return Result;
 }
 
+SCEVHandle ScalarEvolution::getSMinExpr(const SCEVHandle &LHS,
+                                        const SCEVHandle &RHS) {
+  // ~smax(~x, ~y) == smin(x, y).
+  return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+SCEVHandle ScalarEvolution::getUMinExpr(const SCEVHandle &LHS,
+                                        const SCEVHandle &RHS) {
+  // ~umax(~x, ~y) == umin(x, y)
+  return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
 SCEVHandle ScalarEvolution::getUnknown(Value *V) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   if (isa<ConstantPointerNull>(V))
     return getIntegerSCEV(0, V->getType());
   SCEVUnknown *&Result = (*SCEVUnknowns)[V];
-  if (Result == 0) Result = new SCEVUnknown(V);
+  if (Result == 0) Result = new SCEVUnknown(V, this);
   return Result;
 }
 
@@ -1977,6 +2140,22 @@ ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) {
   return getTruncateExpr(V, Ty);
 }
 
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+SCEVHandle ScalarEvolution::getUMaxFromMismatchedTypes(const SCEVHandle &LHS,
+                                                       const SCEVHandle &RHS) {
+  SCEVHandle PromotedLHS = LHS;
+  SCEVHandle PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
 /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
 /// the specified instruction and replaces any references to the symbolic value
 /// SymName with the specified value.  This is used during PHI resolution.
@@ -2040,7 +2219,7 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
           if (FoundIndex != Add->getNumOperands()) {
             // Create an add with everything but the specified operand.
-            std::vector<SCEVHandle> Ops;
+            SmallVector<SCEVHandle, 8> Ops;
             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
               if (i != FoundIndex)
                 Ops.push_back(Add->getOperand(i));
@@ -2143,73 +2322,134 @@ SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) {
 /// guaranteed to end in (at every loop iteration).  It is, at the same time,
 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
-static uint32_t GetMinTrailingZeros(SCEVHandle S, const ScalarEvolution &SE) {
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEVHandle &S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return C->getValue()->getValue().countTrailingZeros();
 
   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
-    return std::min(GetMinTrailingZeros(T->getOperand(), SE),
-                    (uint32_t)SE.getTypeSizeInBits(T->getType()));
+    return std::min(GetMinTrailingZeros(T->getOperand()),
+                    (uint32_t)getTypeSizeInBits(T->getType()));
 
   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
-    uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
-    return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
-             SE.getTypeSizeInBits(E->getType()) : OpRes;
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
   }
 
   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
-    uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
-    return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
-             SE.getTypeSizeInBits(E->getType()) : OpRes;
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
   }
 
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
     // The result is the sum of all operands results.
-    uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
-    uint32_t BitWidth = SE.getTypeSizeInBits(M->getType());
+    uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+    uint32_t BitWidth = getTypeSizeInBits(M->getType());
     for (unsigned i = 1, e = M->getNumOperands();
          SumOpRes != BitWidth && i != e; ++i)
-      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i), SE),
+      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
                           BitWidth);
     return SumOpRes;
   }
 
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
     return MinOpRes;
   }
 
-  // SCEVUDivExpr, SCEVUnknown
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+    return Zeros.countTrailingOnes();
+  }
+
+  // SCEVUDivExpr
   return 0;
 }
 
+uint32_t
+ScalarEvolution::GetMinLeadingZeros(const SCEVHandle &S) {
+  // TODO: Handle other SCEV expression types here.
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return C->getValue()->getValue().countLeadingZeros();
+
+  if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    // A zero-extension cast adds zero bits.
+    return GetMinLeadingZeros(C->getOperand()) +
+           (getTypeSizeInBits(C->getType()) -
+            getTypeSizeInBits(C->getOperand()->getType()));
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+    return Zeros.countLeadingOnes();
+  }
+
+  return 1;
+}
+
+uint32_t
+ScalarEvolution::GetMinSignBits(const SCEVHandle &S) {
+  // TODO: Handle other SCEV expression types here.
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    const APInt &A = C->getValue()->getValue();
+    return A.isNegative() ? A.countLeadingOnes() :
+                            A.countLeadingZeros();
+  }
+
+  if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) {
+    // A sign-extension cast adds sign bits.
+    return GetMinSignBits(C->getOperand()) +
+           (getTypeSizeInBits(C->getType()) -
+            getTypeSizeInBits(C->getOperand()->getType()));
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    return ComputeNumSignBits(U->getValue(), TD);
+  }
+
+  return 1;
+}
+
 /// createSCEV - We know that there is no SCEV for the specified value.
 /// Analyze the expression.
 ///
@@ -2248,14 +2488,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
       if (CI->isAllOnesValue())
         return getSCEV(U->getOperand(0));
       const APInt &A = CI->getValue();
-      unsigned Ones = A.countTrailingOnes();
-      if (APIntOps::isMask(Ones, A))
+
+      // Instcombine's ShrinkDemandedConstant may strip bits out of
+      // constants, obscuring what would otherwise be a low-bits mask.
+      // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+      // knew about to reconstruct a low-bits mask value.
+      unsigned LZ = A.countLeadingZeros();
+      unsigned BitWidth = A.getBitWidth();
+      APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+
+      APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+      if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
         return
           getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
-                                            IntegerType::get(Ones)),
+                                            IntegerType::get(BitWidth - LZ)),
                             U->getType());
     }
     break;
+
   case Instruction::Or:
     // If the RHS of the Or is a constant, we may have something like:
     // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
@@ -2266,7 +2519,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
       SCEVHandle LHS = getSCEV(U->getOperand(0));
       const APInt &CIVal = CI->getValue();
-      if (GetMinTrailingZeros(LHS, *this) >=
+      if (GetMinTrailingZeros(LHS) >=
           (CIVal.getBitWidth() - CIVal.countLeadingZeros()))
         return getAddExpr(LHS, getSCEV(U->getOperand(1)));
     }
@@ -2292,9 +2545,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
           if (BO->getOpcode() == Instruction::And &&
               LCI->getValue() == CI->getValue())
             if (const SCEVZeroExtendExpr *Z =
-                  dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0))))
-              return getZeroExtendExpr(getNotSCEV(Z->getOperand()),
-                                       U->getType());
+                  dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+              const Type *UTy = U->getType();
+              SCEVHandle Z0 = Z->getOperand();
+              const Type *Z0Ty = Z0->getType();
+              unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+              // If C is a low-bits mask, the zero extend is zerving to
+              // mask off the high bits. Complement the operand and
+              // re-apply the zext.
+              if (APIntOps::isMask(Z0TySize, CI->getValue()))
+                return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+              // If C is a single bit, it may be in the sign-bit position
+              // before the zero-extend. In this case, represent the xor
+              // using an add, which is equivalent, and re-apply the zext.
+              APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize);
+              if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+                  Trunc.isSignBit())
+                return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+                                         UTy);
+            }
     }
     break;
 
@@ -2385,10 +2656,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
         if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
           return getSMaxExpr(getSCEV(LHS), getSCEV(RHS));
         else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          // ~smax(~x, ~y) == smin(x, y).
-          return getNotSCEV(getSMaxExpr(
-                                   getNotSCEV(getSCEV(LHS)),
-                                   getNotSCEV(getSCEV(RHS))));
+          return getSMinExpr(getSCEV(LHS), getSCEV(RHS));
         break;
       case ICmpInst::ICMP_ULT:
       case ICmpInst::ICMP_ULE:
@@ -2399,9 +2667,25 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
         if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
           return getUMaxExpr(getSCEV(LHS), getSCEV(RHS));
         else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          // ~umax(~x, ~y) == umin(x, y)
-          return getNotSCEV(getUMaxExpr(getNotSCEV(getSCEV(LHS)),
-                                        getNotSCEV(getSCEV(RHS))));
+          return getUMinExpr(getSCEV(LHS), getSCEV(RHS));
+        break;
+      case ICmpInst::ICMP_NE:
+        // n != 0 ? n : 1  ->  umax(n, 1)
+        if (LHS == U->getOperand(1) &&
+            isa<ConstantInt>(U->getOperand(2)) &&
+            cast<ConstantInt>(U->getOperand(2))->isOne() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero())
+          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(2)));
+        break;
+      case ICmpInst::ICMP_EQ:
+        // n == 0 ? 1 : n  ->  umax(n, 1)
+        if (LHS == U->getOperand(2) &&
+            isa<ConstantInt>(U->getOperand(1)) &&
+            cast<ConstantInt>(U->getOperand(1))->isOne() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero())
+          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(1)));
         break;
       default:
         break;
@@ -2462,9 +2746,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
 
       // Update the value in the map.
       Pair.first->second = ItCount;
-    } else if (isa<PHINode>(L->getHeader()->begin())) {
-      // Only count loops that have phi nodes as not being computable.
-      ++NumTripCountsNotComputed;
+    } else {
+      if (ItCount.Max != CouldNotCompute)
+        // Update the value in the map.
+        Pair.first->second = ItCount;
+      if (isa<PHINode>(L->getHeader()->begin()))
+        // Only count loops that have phi nodes as not being computable.
+        ++NumTripCountsNotComputed;
     }
 
     // Now that we know more about the trip count for this loop, forget any
@@ -2520,19 +2808,58 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
 /// of the specified loop will execute.
 ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
-  // If the loop has a non-one exit block count, we can't analyze it.
-  BasicBlock *ExitBlock = L->getExitBlock();
-  if (!ExitBlock)
-    return CouldNotCompute;
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // Examine all exits and pick the most conservative values.
+  SCEVHandle BECount = CouldNotCompute;
+  SCEVHandle MaxBECount = CouldNotCompute;
+  bool CouldNotComputeBECount = false;
+  bool CouldNotComputeMaxBECount = false;
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BackedgeTakenInfo NewBTI =
+      ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+
+    if (NewBTI.Exact == CouldNotCompute) {
+      // We couldn't compute an exact value for this exit, so
+      // we don't be able to compute an exact value for the loop.
+      CouldNotComputeBECount = true;
+      BECount = CouldNotCompute;
+    } else if (!CouldNotComputeBECount) {
+      if (BECount == CouldNotCompute)
+        BECount = NewBTI.Exact;
+      else {
+        // TODO: More analysis could be done here. For example, a
+        // loop with a short-circuiting && operator has an exact count
+        // of the min of both sides.
+        CouldNotComputeBECount = true;
+        BECount = CouldNotCompute;
+      }
+    }
+    if (NewBTI.Max == CouldNotCompute) {
+      // We couldn't compute an maximum value for this exit, so
+      // we don't be able to compute an maximum value for the loop.
+      CouldNotComputeMaxBECount = true;
+      MaxBECount = CouldNotCompute;
+    } else if (!CouldNotComputeMaxBECount) {
+      if (MaxBECount == CouldNotCompute)
+        MaxBECount = NewBTI.Max;
+      else
+        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
+    }
+  }
+
+  return BackedgeTakenInfo(BECount, MaxBECount);
+}
 
-  // Okay, there is one exit block.  Try to find the condition that causes the
-  // loop to be exited.
-  BasicBlock *ExitingBlock = L->getExitingBlock();
-  if (!ExitingBlock)
-    return CouldNotCompute;   // More than one block exiting!
+/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
+/// of the specified loop will execute if it exits via the specified block.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
+                                                   BasicBlock *ExitingBlock) {
 
-  // Okay, we've computed the exiting block.  See what condition causes us to
-  // exit.
+  // Okay, we've chosen an exiting block.  See what condition causes us to
+  // exit at this block.
   //
   // FIXME: we should be able to handle switch instructions (with a single exit)
   BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
@@ -2547,23 +2874,154 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   // Currently we check for this by checking to see if the Exit branch goes to
   // the loop header.  If so, we know it will always execute the same number of
   // times as the loop.  We also handle the case where the exit block *is* the
-  // loop header.  This is common for un-rotated loops.  More extensive analysis
-  // could be done to handle more cases here.
+  // loop header.  This is common for un-rotated loops.
+  //
+  // If both of those tests fail, walk up the unique predecessor chain to the
+  // header, stopping if there is an edge that doesn't exit the loop. If the
+  // header is reached, the execution count of the branch will be equal to the
+  // trip count of the loop.
+  //
+  //  More extensive analysis could be done to handle more cases here.
+  //
   if (ExitBr->getSuccessor(0) != L->getHeader() &&
       ExitBr->getSuccessor(1) != L->getHeader() &&
-      ExitBr->getParent() != L->getHeader())
-    return CouldNotCompute;
-  
-  ICmpInst *ExitCond = dyn_cast<ICmpInst>(ExitBr->getCondition());
+      ExitBr->getParent() != L->getHeader()) {
+    // The simple checks failed, try climbing the unique predecessor chain
+    // up to the header.
+    bool Ok = false;
+    for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+      BasicBlock *Pred = BB->getUniquePredecessor();
+      if (!Pred)
+        return CouldNotCompute;
+      TerminatorInst *PredTerm = Pred->getTerminator();
+      for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+        if (PredSucc == BB)
+          continue;
+        // If the predecessor has a successor that isn't BB and isn't
+        // outside the loop, assume the worst.
+        if (L->contains(PredSucc))
+          return CouldNotCompute;
+      }
+      if (Pred == L->getHeader()) {
+        Ok = true;
+        break;
+      }
+      BB = Pred;
+    }
+    if (!Ok)
+      return CouldNotCompute;
+  }
+
+  // Procede to the next level to examine the exit condition expression.
+  return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
+                                               ExitBr->getSuccessor(0),
+                                               ExitBr->getSuccessor(1));
+}
+
+/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+                                                       Value *ExitCond,
+                                                       BasicBlock *TBB,
+                                                       BasicBlock *FBB) {
+  // Check if the controlling expression for this loop is an and or or. In
+  // such cases, an exact backedge-taken count may be infeasible, but a
+  // maximum count may still be feasible.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+    if (BO->getOpcode() == Instruction::And) {
+      // Recurse on the operands of the and.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      SCEVHandle BECount = CouldNotCompute;
+      SCEVHandle MaxBECount = CouldNotCompute;
+      if (L->contains(TBB)) {
+        // Both conditions must be true for the loop to continue executing.
+        // Choose the less conservative count.
+        // TODO: Take the minimum of the exact counts.
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+        // TODO: Take the minimum of the maximum counts.
+        if (BTI0.Max == CouldNotCompute)
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == CouldNotCompute)
+          MaxBECount = BTI0.Max;
+        else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max))
+          if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max))
+              MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(),
+                                                      C1->getValue()->getValue()));
+      } else {
+        // Both conditions must be true for the loop to exit.
+        assert(L->contains(FBB) && "Loop block has no successor in loop!");
+        if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+          BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+          MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+    if (BO->getOpcode() == Instruction::Or) {
+      // Recurse on the operands of the or.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      SCEVHandle BECount = CouldNotCompute;
+      SCEVHandle MaxBECount = CouldNotCompute;
+      if (L->contains(FBB)) {
+        // Both conditions must be false for the loop to continue executing.
+        // Choose the less conservative count.
+        // TODO: Take the minimum of the exact counts.
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+        // TODO: Take the minimum of the maximum counts.
+        if (BTI0.Max == CouldNotCompute)
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == CouldNotCompute)
+          MaxBECount = BTI0.Max;
+        else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max))
+          if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max))
+              MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(),
+                                                      C1->getValue()->getValue()));
+      } else {
+        // Both conditions must be false for the loop to exit.
+        assert(L->contains(TBB) && "Loop block has no successor in loop!");
+        if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+          BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+          MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+  }
+
+  // With an icmp, it may be feasible to compute an exact backedge-taken count.
+  // Procede to the next level to examine the icmp.
+  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+    return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
 
   // If it's not an integer or pointer comparison then compute it the hard way.
-  if (ExitCond == 0)
-    return ComputeBackedgeTakenCountExhaustively(L, ExitBr->getCondition(),
-                                          ExitBr->getSuccessor(0) == ExitBlock);
+  return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+                                                           ICmpInst *ExitCond,
+                                                           BasicBlock *TBB,
+                                                           BasicBlock *FBB) {
 
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Cond;
-  if (ExitBr->getSuccessor(1) == ExitBlock)
+  if (!L->contains(FBB))
     Cond = ExitCond->getPredicate();
   else
     Cond = ExitCond->getInversePredicate();
@@ -2573,7 +3031,12 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
       SCEVHandle ItCnt =
         ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
-      if (!isa<SCEVCouldNotCompute>(ItCnt)) return ItCnt;
+      if (!isa<SCEVCouldNotCompute>(ItCnt)) {
+        unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
+        return BackedgeTakenInfo(ItCnt,
+                                 isa<SCEVConstant>(ItCnt) ? ItCnt :
+                                   getConstant(APInt::getMaxValue(BitWidth)-1));
+      }
     }
 
   SCEVHandle LHS = getSCEV(ExitCond->getOperand(0));
@@ -2651,8 +3114,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     break;
   }
   return
-    ComputeBackedgeTakenCountExhaustively(L, ExitCond,
-                                          ExitBr->getSuccessor(0) == ExitBlock);
+    ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
 static ConstantInt *
@@ -2750,7 +3212,7 @@ ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS,
   unsigned MaxSteps = MaxBruteForceIterations;
   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
     ConstantInt *ItCst =
-      ConstantInt::get(IdxExpr->getType(), IterationNum);
+      ConstantInt::get(cast<IntegerType>(IdxExpr->getType()), IterationNum);
     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
 
     // Form the GEP offset.
@@ -2945,7 +3407,7 @@ ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen)
     if (CondVal->getValue() == uint64_t(ExitWhen)) {
       ConstantEvolutionLoopExitValue[PN] = PHIVal;
       ++NumBruteForceTripCountsComputed;
-      return getConstant(ConstantInt::get(Type::Int32Ty, IterationNum));
+      return getConstant(Type::Int32Ty, IterationNum);
     }
 
     // Compute the value of the PHI node for the next iteration.
@@ -3074,7 +3536,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
       if (OpAtScope != Comm->getOperand(i)) {
         // Okay, at least one of these operands is loop variant but might be
         // foldable.  Build a new instance of the folded commutative expression.
-        std::vector<SCEVHandle> NewOps(Comm->op_begin(), Comm->op_begin()+i);
+        SmallVector<SCEVHandle, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i);
         NewOps.push_back(OpAtScope);
 
         for (++i; i != e; ++i) {
@@ -3394,6 +3856,29 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   return 0;
 }
 
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEVHandle &A, const SCEVHandle &B) {
+  // Quick check to see if they are the same SCEV.
+  if (A == B) return true;
+
+  // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+  // two different instructions with the same value. Check for this case.
+  if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+    if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+      if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+        if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+          if (AI->isIdenticalTo(BI))
+            return true;
+
+  // Otherwise assume they may have a different value.
+  return false;
+}
+
 /// isLoopGuardedByCond - Test whether entry to the loop is protected by
 /// a conditional between LHS and RHS.  This is used to help avoid max
 /// expressions in loop trip counts.
@@ -3494,15 +3979,43 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
 
     SCEVHandle PreCondLHSSCEV = getSCEV(PreCondLHS);
     SCEVHandle PreCondRHSSCEV = getSCEV(PreCondRHS);
-    if ((LHS == PreCondLHSSCEV && RHS == PreCondRHSSCEV) ||
-        (LHS == getNotSCEV(PreCondRHSSCEV) &&
-         RHS == getNotSCEV(PreCondLHSSCEV)))
+    if ((HasSameValue(LHS, PreCondLHSSCEV) &&
+         HasSameValue(RHS, PreCondRHSSCEV)) ||
+        (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
+         HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
       return true;
   }
 
   return false;
 }
 
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+SCEVHandle ScalarEvolution::getBECount(const SCEVHandle &Start,
+                                       const SCEVHandle &End,
+                                       const SCEVHandle &Step) {
+  const Type *Ty = Start->getType();
+  SCEVHandle NegOne = getIntegerSCEV(-1, Ty);
+  SCEVHandle Diff = getMinusSCEV(End, Start);
+  SCEVHandle RoundUp = getAddExpr(Step, NegOne);
+
+  // Add an adjustment to the difference between End and Start so that
+  // the division will effectively round up.
+  SCEVHandle Add = getAddExpr(Diff, RoundUp);
+
+  // Check Add for unsigned overflow.
+  // TODO: More sophisticated things could be done here.
+  const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1);
+  SCEVHandle OperandExtendedAdd =
+    getAddExpr(getZeroExtendExpr(Diff, WideTy),
+               getZeroExtendExpr(RoundUp, WideTy));
+  if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+    return CouldNotCompute;
+
+  return getUDivExpr(Add, Step);
+}
+
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
@@ -3520,7 +4033,6 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // FORNOW: We only support unit strides.
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     SCEVHandle Step = AddRec->getStepRecurrence(*this);
-    SCEVHandle NegOne = getIntegerSCEV(-1, AddRec->getType());
 
     // TODO: handle non-constant strides.
     const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
@@ -3575,22 +4087,20 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                      : getUMaxExpr(RHS, Start);
 
     // Determine the maximum constant end value.
-    SCEVHandle MaxEnd = isa<SCEVConstant>(End) ? End :
-      getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) :
-                             APInt::getMaxValue(BitWidth));
+    SCEVHandle MaxEnd =
+      isa<SCEVConstant>(End) ? End :
+      getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth)
+                               .ashr(GetMinSignBits(End) - 1) :
+                             APInt::getMaxValue(BitWidth)
+                               .lshr(GetMinLeadingZeros(End)));
 
     // Finally, we subtract these two values and divide, rounding up, to get
     // the number of times the backedge is executed.
-    SCEVHandle BECount = getUDivExpr(getAddExpr(getMinusSCEV(End, Start),
-                                                getAddExpr(Step, NegOne)),
-                                     Step);
+    SCEVHandle BECount = getBECount(Start, End, Step);
 
     // The maximum backedge count is similar, except using the minimum start
     // value and the maximum end value.
-    SCEVHandle MaxBECount = getUDivExpr(getAddExpr(getMinusSCEV(MaxEnd,
-                                                                MinStart),
-                                                   getAddExpr(Step, NegOne)),
-                                        Step);
+    SCEVHandle MaxBECount = getBECount(MinStart, MaxEnd, Step);;
 
     return BackedgeTakenInfo(BECount, MaxBECount);
   }
@@ -3611,7 +4121,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // If the start is a non-zero constant, shift the range to simplify things.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
-      std::vector<SCEVHandle> Operands(op_begin(), op_end());
+      SmallVector<SCEVHandle, 4> Operands(op_begin(), op_end());
       Operands[0] = SE.getIntegerSCEV(0, SC->getType());
       SCEVHandle Shifted = SE.getAddRecExpr(Operands, getLoop());
       if (const SCEVAddRecExpr *ShiftedAddRec =
@@ -3636,7 +4146,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // iteration exits.
   unsigned BitWidth = SE.getTypeSizeInBits(getType());
   if (!Range.contains(APInt(BitWidth, 0)))
-    return SE.getConstant(ConstantInt::get(getType(),0));
+    return SE.getIntegerSCEV(0, getType());
 
   if (isAffine()) {
     // If this is an affine expression then we have this situation:
@@ -3672,7 +4182,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // quadratic equation to solve it.  To do this, we must frame our problem in
     // terms of figuring out when zero is crossed, instead of when
     // Range.getUpper() is crossed.
-    std::vector<SCEVHandle> NewOps(op_begin(), op_end());
+    SmallVector<SCEVHandle, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
     SCEVHandle NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
 
@@ -3783,7 +4293,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute()) {
+  : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute(0)) {
 }
 
 bool ScalarEvolution::runOnFunction(Function &F) {
@@ -3847,11 +4357,18 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
       OS << "  -->  ";
       SCEVHandle SV = SE.getSCEV(&*I);
       SV->print(OS);
-      OS << "\t\t";
 
-      if (const Loop *L = LI->getLoopFor((*I).getParent())) {
-        OS << "Exits: ";
-        SCEVHandle ExitValue = SE.getSCEVAtScope(&*I, L->getParentLoop());
+      const Loop *L = LI->getLoopFor((*I).getParent());
+
+      SCEVHandle AtUse = SE.getSCEVAtScope(SV, L);
+      if (AtUse != SV) {
+        OS << "  -->  ";
+        AtUse->print(OS);
+      }
+
+      if (L) {
+        OS << "\t\t" "Exits: ";
+        SCEVHandle ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
         if (!ExitValue->isLoopInvariant(L)) {
           OS << "<<Unknown>>";
         } else {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index e1f8fa4..2a73c27 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -182,7 +182,8 @@ static bool FactorOutConstant(SCEVHandle &S,
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
       if (!C->getValue()->getValue().srem(Factor)) {
-        std::vector<SCEVHandle> NewMulOps(M->getOperands());
+        const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands();
+        SmallVector<SCEVHandle, 4> NewMulOps(MOperands.begin(), MOperands.end());
         NewMulOps[0] =
           SE.getConstant(C->getValue()->getValue().sdiv(Factor));
         S = SE.getMulExpr(NewMulOps);
@@ -239,7 +240,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
                                     Value *V) {
   const Type *ElTy = PTy->getElementType();
   SmallVector<Value *, 4> GepIndices;
-  std::vector<SCEVHandle> Ops(op_begin, op_end);
+  SmallVector<SCEVHandle, 8> Ops(op_begin, op_end);
   bool AnyNonZeroIndices = false;
 
   // Decend down the pointer's type and attempt to convert the other
@@ -250,8 +251,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
   for (;;) {
     APInt ElSize = APInt(SE.getTypeSizeInBits(Ty),
                          ElTy->isSized() ?  SE.TD->getTypeAllocSize(ElTy) : 0);
-    std::vector<SCEVHandle> NewOps;
-    std::vector<SCEVHandle> ScaledOps;
+    SmallVector<SCEVHandle, 8> NewOps;
+    SmallVector<SCEVHandle, 8> ScaledOps;
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       // Split AddRecs up into parts as either of the parts may be usable
       // without the other.
@@ -297,9 +298,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
               GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx));
               ElTy = STy->getTypeAtIndex(ElIdx);
               Ops[0] =
-                SE.getConstant(ConstantInt::get(Ty,
-                                                FullOffset -
-                                                  SL.getElementOffset(ElIdx)));
+                SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
               AnyNonZeroIndices = true;
               continue;
             }
@@ -365,7 +364,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   // comments on expandAddToGEP for details.
   if (SE.TD)
     if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
-      const std::vector<SCEVHandle> &Ops = S->getOperands();
+      const SmallVectorImpl<SCEVHandle> &Ops = S->getOperands();
       return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1],
                             PTy, Ty, V);
     }
@@ -432,7 +431,7 @@ static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest,
   }
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
     Base = A->getOperand(A->getNumOperands()-1);
-    std::vector<SCEVHandle> NewAddOps(A->op_begin(), A->op_end());
+    SmallVector<SCEVHandle, 8> NewAddOps(A->op_begin(), A->op_end());
     NewAddOps.back() = Rest;
     Rest = SE.getAddExpr(NewAddOps);
     ExposePointerBase(Base, Rest, SE);
@@ -473,7 +472,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
-    std::vector<SCEVHandle> NewOps(S->getOperands());
+    const SmallVectorImpl<SCEVHandle> &SOperands = S->getOperands();
+    SmallVector<SCEVHandle, 4> NewOps(SOperands.begin(), SOperands.end());
     NewOps[0] = SE.getIntegerSCEV(0, Ty);
     SCEVHandle Rest = SE.getAddRecExpr(NewOps, L);
 
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 45f97b8..17ffa2d 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -52,11 +52,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Mask.getBitWidth();
-  assert((V->getType()->isInteger() || isa<PointerType>(V->getType())) &&
+  assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) &&
          "Not integer or pointer type!");
-  assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) &&
-         (!isa<IntegerType>(V->getType()) ||
-          V->getType()->getPrimitiveSizeInBits() == BitWidth) &&
+  assert((!TD ||
+          TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+         (!V->getType()->isIntOrIntVector() ||
+          V->getType()->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth && 
          KnownOne.getBitWidth() == BitWidth &&
          "V, Mask, KnownOne and KnownZero should have same BitWidth");
@@ -67,12 +68,26 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     KnownZero = ~KnownOne & Mask;
     return;
   }
-  // Null is all-zeros.
-  if (isa<ConstantPointerNull>(V)) {
+  // Null and aggregate-zero are all-zeros.
+  if (isa<ConstantPointerNull>(V) ||
+      isa<ConstantAggregateZero>(V)) {
     KnownOne.clear();
     KnownZero = Mask;
     return;
   }
+  // Handle a constant vector by taking the intersection of the known bits of
+  // each element.
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+    KnownZero.set(); KnownOne.set();
+    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+      APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+      ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
+                        TD, Depth);
+      KnownZero &= KnownZero2;
+      KnownOne &= KnownOne2;
+    }
+    return;
+  }
   // The address of an aligned GlobalValue has trailing zeros.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
@@ -218,7 +233,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     const Type *SrcTy = I->getOperand(0)->getType();
     unsigned SrcBitWidth = TD ?
       TD->getTypeSizeInBits(SrcTy) :
-      SrcTy->getPrimitiveSizeInBits();
+      SrcTy->getScalarSizeInBits();
     APInt MaskIn(Mask);
     MaskIn.zextOrTrunc(SrcBitWidth);
     KnownZero.zextOrTrunc(SrcBitWidth);
@@ -480,7 +495,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         // Handle array index arithmetic.
         const Type *IndexedTy = GTI.getIndexedType();
         if (!IndexedTy->isSized()) return;
-        unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits();
+        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
         uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
         LocalMask = APInt::getAllOnesValue(GEPOpiBits);
         LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
@@ -609,8 +624,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
 /// 'Op' must have a scalar integer type.
 ///
 unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
-  const IntegerType *Ty = cast<IntegerType>(V->getType());
-  unsigned TyBits = Ty->getBitWidth();
+  const Type *Ty = V->getType();
+  unsigned TyBits = Ty->getScalarSizeInBits();
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index c5190ef..b3f7cdb 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -526,6 +526,10 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(coldcc);
   KEYWORD(x86_stdcallcc);
   KEYWORD(x86_fastcallcc);
+  KEYWORD(arm_apcscc);
+  KEYWORD(arm_aapcscc);
+  KEYWORD(arm_aapcs_vfpcc);
+
   KEYWORD(cc);
   KEYWORD(c);
 
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 4863f3c..909370c 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -808,8 +808,11 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
 ///   ::= 'coldcc'
 ///   ::= 'x86_stdcallcc'
 ///   ::= 'x86_fastcallcc'
+///   ::= 'arm_apcscc'
+///   ::= 'arm_aapcscc'
+///   ::= 'arm_aapcs_vfpcc'
 ///   ::= 'cc' UINT
-/// 
+///
 bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   switch (Lex.getKind()) {
   default:                       CC = CallingConv::C; return false;
@@ -818,6 +821,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
   case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
   case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+  case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
+  case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
+  case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
   case lltok::kw_cc:             Lex.Lex(); return ParseUInt32(CC);
   }
   Lex.Lex();
@@ -1743,7 +1749,7 @@ bool LLParser::ParseValID(ValID &ID) {
     Lex.Lex();
     if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
         ParseGlobalTypeAndValue(SrcVal) ||
-        ParseToken(lltok::kw_to, "expected 'to' int constantexpr cast") ||
+        ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
         ParseType(DestTy) ||
         ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
       return true;
@@ -3145,7 +3151,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) {
 }
 
 /// ParseLoad
-///   ::= 'volatile'? 'load' TypeAndValue (',' 'align' uint)?
+///   ::= 'volatile'? 'load' TypeAndValue (',' 'align' i32)?
 bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
                          bool isVolatile) {
   Value *Val; LocTy Loc;
@@ -3163,7 +3169,7 @@ bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
 }
 
 /// ParseStore
-///   ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' uint)?
+///   ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
 bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
                           bool isVolatile) {
   Value *Val, *Ptr; LocTy Loc, PtrLoc;
@@ -3186,7 +3192,7 @@ bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
 }
 
 /// ParseGetResult
-///   ::= 'getresult' TypeAndValue ',' uint
+///   ::= 'getresult' TypeAndValue ',' i32
 /// FIXME: Remove support for getresult in LLVM 3.0
 bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Val; LocTy ValLoc, EltLoc;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 9335d19..cff89f8 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -60,7 +60,9 @@ namespace lltok {
     kw_gc,
     kw_c,
 
-    kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_x86_stdcallcc, kw_x86_fastcallcc,
+    kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+    kw_x86_stdcallcc, kw_x86_fastcallcc,
+    kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
 
     kw_signext,
     kw_zeroext,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 3b44f564..6b9606c 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2040,14 +2040,13 @@ void BitcodeReader::dematerializeFunction(Function *F) {
 
 
 Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
-  for (DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator I = 
-       DeferredFunctionInfo.begin(), E = DeferredFunctionInfo.end(); I != E;
-       ++I) {
-    Function *F = I->first;
+  // Iterate over the module, deserializing any functions that are still on
+  // disk.
+  for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+       F != E; ++F)
     if (F->hasNotBeenReadFromBitcode() &&
         materializeFunction(F, ErrInfo))
       return 0;
-  }
 
   // Upgrade any intrinsic calls that slipped through (should not happen!) and 
   // delete the old functions to clean up. We can't do this unless the entire 
@@ -2123,7 +2122,7 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg){
   // is run.
   if (M)
     M = R->releaseModule(ErrMsg);
-  
+   
   delete R;
   return M;
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 45462da..e931904 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -152,6 +152,9 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 bool AsmPrinter::doInitialization(Module &M) {
   Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix());
   
+  if (TAI->doesAllowQuotesInName())
+    Mang->setUseQuotes(true);
+  
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
   assert(MI && "AsmPrinter didn't require GCModuleInfo?");
 
@@ -174,9 +177,17 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   SwitchToDataSection("");   // Reset back to no section.
   
-  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  if (MMI) MMI->AnalyzeModule(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
+  if (TAI->doesSupportDebugInformation() 
+      || TAI->doesSupportExceptionHandling()) {
+    MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+    if (MMI) {
+      MMI->AnalyzeModule(M);
+      DW = getAnalysisIfAvailable<DwarfWriter>();
+      if (DW)
+        DW->BeginModule(&M, MMI, O, this, TAI);
+    }
+  }
+
   return false;
 }
 
@@ -347,8 +358,9 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
   const char* JumpTableDataSection = TAI->getJumpTableDataSection();
   const Function *F = MF.getFunction();
   unsigned SectionFlags = TAI->SectionFlagsForGlobal(F);
+  bool JTInDiffSection = false;
   if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
-     !JumpTableDataSection ||
+      !JumpTableDataSection ||
       SectionFlags & SectionFlags::Linkonce) {
     // In PIC mode, we need to emit the jump table to the same section as the
     // function body itself, otherwise the label differences won't make sense.
@@ -357,6 +369,7 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
     SwitchToSection(TAI->SectionForGlobal(F));
   } else {
     SwitchToDataSection(JumpTableDataSection);
+    JTInDiffSection = true;
   }
   
   EmitAlignment(Log2_32(MJTI->getAlignment()));
@@ -380,8 +393,10 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
-    if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
-      O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+    if (JTInDiffSection) {
+      if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
+        O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+    }
     
     O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
       << '_' << i << ":\n";
@@ -502,7 +517,7 @@ const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) {
 void AsmPrinter::EmitLLVMUsedList(Constant *List) {
   const char *Directive = TAI->getUsedDirective();
 
-  // Should be an array of 'sbyte*'.
+  // Should be an array of 'i8*'.
   ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
   
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c773378..9d340e3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1757,6 +1757,9 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+  CompileUnit *Unit = MainCU;
+  if (!Unit)
+    Unit = &FindCompileUnit(SP.getCompileUnit());
   GlobalVariable *GV = SP.getGV();
   DenseMap<const GlobalVariable *, DbgScope *>::iterator
     II = AbstractInstanceRootMap.find(GV);
@@ -1767,7 +1770,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
     DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
 
     // Get the compile unit context.
-    CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
     DIE *SPDie = Unit->getDieMapSlotFor(GV);
     if (!SPDie)
       SPDie = CreateSubprogramDIE(Unit, SP, false, true);
@@ -1789,7 +1791,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   // Create a concrete inlined instance for this inlined function.
   DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
   DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
-  CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
   ScopeDie->setAbstractCompileUnit(Unit);
 
   DIE *Origin = Unit->getDieMapSlotFor(GV);
@@ -1850,7 +1851,14 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
   }
 
   SmallVector<DbgScope *, 8> &Scopes = I->second;
-  assert(!Scopes.empty() && "We should have at least one debug scope!");
+  if (Scopes.empty()) {
+    // Returned ID is 0 if this is unbalanced "end of inlined
+    // scope". This could happen if optimizer eats dbg intrinsics
+    // or "beginning of inlined scope" is not recoginized due to
+    // missing location info. In such cases, ignore this region.end.
+    return 0;
+  }
+
   DbgScope *Scope = Scopes.back(); Scopes.pop_back();
   unsigned ID = MMI->NextLabelID();
   MMI->RecordUsedDbgLabel(ID);
@@ -1987,8 +1995,8 @@ void DwarfDebug::EmitInitial() {
   Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
   EmitLabel("section_aranges", 0);
 
-  if (TAI->doesSupportMacInfoSection()) {
-    Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
+    Asm->SwitchToDataSection(LineInfoDirective);
     EmitLabel("section_macinfo", 0);
   }
 
@@ -2534,9 +2542,9 @@ void DwarfDebug::EmitDebugRanges() {
 /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
 ///
 void DwarfDebug::EmitDebugMacInfo() {
-  if (TAI->doesSupportMacInfoSection()) {
+  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
     // Start the dwarf macinfo section.
-    Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+    Asm->SwitchToDataSection(LineInfoDirective);
     Asm->EOL();
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index f7ca4f4..a1b97df 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -190,7 +190,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(Offset);
         Asm->EOL("Offset");
       } else {
-        assert(0 && "Machine move no supported yet.");
+        assert(0 && "Machine move not supported yet.");
       }
     } else if (Src.isReg() &&
                Src.getReg() == MachineLocation::VirtualFP) {
@@ -200,7 +200,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH));
         Asm->EOL("Register");
       } else {
-        assert(0 && "Machine move no supported yet.");
+        assert(0 && "Machine move not supported yet.");
       }
     } else {
       unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1d0887f..4d5c3c2 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -547,7 +547,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     // fallthrough.
     if (!BBI.FalseBB)
       BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);  
-    assert(BBI.FalseBB && "Expected to find the fallthrough block!");
+    if (!BBI.FalseBB) {
+      // Malformed bcc? True and false blocks are the same?
+      BBI.IsUnpredicable = true;
+      return;
+    }
   }
 
   // Then scan all the instructions.
@@ -663,6 +667,13 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     return BBI;
   }
 
+  // Do not ifcvt if true and false fallthrough blocks are the same.
+  if (!BBI.FalseBB) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
   BBInfo &TrueBBI  = AnalyzeBlock(BBI.TrueBB, Tokens);
   BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
 
diff --git a/lib/CodeGen/LazyLiveness.cpp b/lib/CodeGen/LazyLiveness.cpp
index 6fb35d2..a951c99 100644
--- a/lib/CodeGen/LazyLiveness.cpp
+++ b/lib/CodeGen/LazyLiveness.cpp
@@ -32,10 +32,12 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf,
   calculated.set(preorder[MBB]);
   
   for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) {
+    assert(rev_preorder.size() > *I && "Unknown block!");
+    
     MachineBasicBlock* SrcMBB = rev_preorder[*I];
     
-    for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin();
-         SI != SrcMBB->succ_end(); ++SI) {
+    for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin(),
+         SE = SrcMBB->succ_end(); SI != SE; ++SI) {
       MachineBasicBlock* TgtMBB = *SI;
       
       if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) &&
@@ -44,7 +46,8 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf,
           computeBackedgeChain(mf, TgtMBB);
         
         tv[MBB].set(preorder[TgtMBB]);
-        tv[MBB] |= tv[TgtMBB];
+        SparseBitVector<128> right = tv[TgtMBB];
+        tv[MBB] |= right;
       }
     }
     
@@ -60,6 +63,12 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
   backedge_target.clear();
   calculated.clear();
   preorder.clear();
+  rev_preorder.clear();
+  
+  rv.resize(mf.size());
+  tv.resize(mf.size());
+  preorder.resize(mf.size());
+  rev_preorder.reserve(mf.size());
   
   MRI = &mf.getRegInfo();
   MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
@@ -106,8 +115,8 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
       for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(),
            SE = (*POI)->succ_end(); SI != SE; ++SI)
         if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) {
-          SparseBitVector<128>& PBV = tv[*POI];
-          PBV = tv[*SI];
+          SparseBitVector<128> right = tv[*SI];
+          tv[*POI] |= right;
         }
   
   for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 67120b8..cac9253 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -305,9 +306,9 @@ void LiveInterval::removeRange(unsigned Start, unsigned End,
               VNInfo *VNI = valnos.back();
               valnos.pop_back();
               VNI->~VNInfo();
-            } while (!valnos.empty() && valnos.back()->def == ~1U);
+            } while (!valnos.empty() && valnos.back()->isUnused());
           } else {
-            ValNo->def = ~1U;
+            ValNo->setIsUnused(true);
           }
         }
       }
@@ -353,9 +354,9 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
       VNInfo *VNI = valnos.back();
       valnos.pop_back();
       VNI->~VNInfo();
-    } while (!valnos.empty() && valnos.back()->def == ~1U);
+    } while (!valnos.empty() && valnos.back()->isUnused());
   } else {
-    ValNo->def = ~1U;
+    ValNo->setIsUnused(true);
   }
 }
  
@@ -371,9 +372,8 @@ void LiveInterval::scaleNumbering(unsigned factor) {
   // Scale VNI info.                                                          
   for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
     VNInfo *vni = *VNI;
-    if (vni->def != ~0U && vni->def != ~1U) {
-      vni->def = InstrSlots::scale(vni->def, factor);
-    }
+
+    vni->def = InstrSlots::scale(vni->def, factor);
 
     for (unsigned i = 0; i < vni->kills.size(); ++i) {
       if (vni->kills[i] != 0)
@@ -421,13 +421,13 @@ VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const {
   return VNI;
 }
 
-
 /// join - Join two live intervals (this, and other) together.  This applies
 /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
 /// the intervals are not joinable, this aborts.
 void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
                         const int *RHSValNoAssignments, 
-                        SmallVector<VNInfo*, 16> &NewVNInfo) {
+                        SmallVector<VNInfo*, 16> &NewVNInfo,
+                        MachineRegisterInfo *MRI) {
   // Determine if any of our live range values are mapped.  This is uncommon, so
   // we want to avoid the interval scan if not. 
   bool MustMapCurValNos = false;
@@ -502,8 +502,18 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
   }
 
   weight += Other.weight;
-  if (Other.preference && !preference)
-    preference = Other.preference;
+
+  // Update regalloc hint if currently there isn't one.
+  if (TargetRegisterInfo::isVirtualRegister(reg) &&
+      TargetRegisterInfo::isVirtualRegister(Other.reg)) {
+    std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg);
+    if (Hint.first == 0 && Hint.second == 0) {
+      std::pair<unsigned, unsigned> OtherHint =
+        MRI->getRegAllocationHint(Other.reg);
+      if (OtherHint.first || OtherHint.second)
+        MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second);
+    }
+  }
 }
 
 /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -582,9 +592,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
             VNInfo *VNI = valnos.back();
             valnos.pop_back();
             VNI->~VNInfo();
-          } while (!valnos.empty() && valnos.back()->def == ~1U);
+          } while (!valnos.empty() && valnos.back()->isUnused());
         } else {
-          V1->def = ~1U;
+          V1->setIsUnused(true);
         }
       }
     }
@@ -611,7 +621,7 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
     else if (UnusedValNo)
       ClobberValNo = UnusedValNo;
     else {
-      UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+      UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
       ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
     }
 
@@ -664,7 +674,7 @@ void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End,
                                        BumpPtrAllocator &VNInfoAllocator) {
   // Find a value # to use for the clobber ranges.  If there is already a value#
   // for unknown values, use it.
-  VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+  VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
   
   iterator IP = begin();
   IP = std::upper_bound(IP, end(), Start);
@@ -747,24 +757,26 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
       VNInfo *VNI = valnos.back();
       valnos.pop_back();
       VNI->~VNInfo();
-    } while (valnos.back()->def == ~1U);
+    } while (valnos.back()->isUnused());
   } else {
-    V1->def = ~1U;
+    V1->setIsUnused(true);
   }
   
   return V2;
 }
 
 void LiveInterval::Copy(const LiveInterval &RHS,
+                        MachineRegisterInfo *MRI,
                         BumpPtrAllocator &VNInfoAllocator) {
   ranges.clear();
   valnos.clear();
-  preference = RHS.preference;
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
+  MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
+
   weight = RHS.weight;
   for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
     const VNInfo *VNI = RHS.getValNumInfo(i);
-    VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator);
-    copyValNumInfo(NewVNI, VNI);
+    createValueCopy(VNI, VNInfoAllocator);
   }
   for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
     const LiveRange &LR = RHS.ranges[i];
@@ -816,22 +828,22 @@ void LiveInterval::print(std::ostream &OS,
       const VNInfo *vni = *i;
       if (vnum) OS << " ";
       OS << vnum << "@";
-      if (vni->def == ~1U) {
+      if (vni->isUnused()) {
         OS << "x";
       } else {
-        if (vni->def == ~0U)
+        if (!vni->isDefAccurate())
           OS << "?";
         else
           OS << vni->def;
         unsigned ee = vni->kills.size();
-        if (ee || vni->hasPHIKill) {
+        if (ee || vni->hasPHIKill()) {
           OS << "-(";
           for (unsigned j = 0; j != ee; ++j) {
             OS << vni->kills[j];
             if (j != ee-1)
               OS << " ";
           }
-          if (vni->hasPHIKill) {
+          if (vni->hasPHIKill()) {
             if (ee)
               OS << " ";
             OS << "phi";
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index cf0a648..d6931df 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -199,7 +199,7 @@ void LiveIntervals::computeNumbering() {
         // Remap the VNInfo def index, which works the same as the
         // start indices above. VN's with special sentinel defs
         // don't need to be remapped.
-        if (vni->def != ~0U && vni->def != ~1U) {
+        if (vni->isDefAccurate() && !vni->isUnused()) {
           unsigned index = vni->def / InstrSlots::NUM;
           unsigned offset = vni->def % InstrSlots::NUM;
           if (offset == InstrSlots::LOAD) {
@@ -447,7 +447,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
       CopyMI = mi;
     // Earlyclobbers move back one.
-    ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+    ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
 
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
@@ -539,13 +539,15 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // The new value number (#1) is defined by the instruction we claimed
       // defined value #0.
       VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy,
+                                            false, // update at *
                                             VNInfoAllocator);
-      
+      ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
       OldValNo->copy = 0;
       if (MO.isEarlyClobber())
-        OldValNo->redefByEC = true;
+        OldValNo->setHasRedefByEC(true);
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -577,12 +579,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         DOUT << " Removing [" << Start << "," << End << "] from: ";
         interval.print(DOUT, tri_); DOUT << "\n";
         interval.removeRange(Start, End);
-        VNI->hasPHIKill = true;
+        VNI->setHasPHIKill(true);
         DOUT << " RESULT: "; interval.print(DOUT, tri_);
 
         // Replace the interval with one of a NEW value number.  Note that this
         // value number isn't actually defined by an instruction, weird huh? :)
-        LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator));
+        LiveRange LR(Start, End,
+          interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator));
+        LR.valno->setIsPHIDef(true);
         DOUT << " replace range with " << LR;
         interval.addRange(LR);
         interval.addKill(LR.valno, End);
@@ -604,13 +608,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
           mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
           tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
         CopyMI = mi;
-      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+      ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
       
       unsigned killIndex = getMBBEndIdx(mbb) + 1;
       LiveRange LR(defIndex, killIndex, ValNo);
       interval.addRange(LR);
       interval.addKill(ValNo, killIndex);
-      ValNo->hasPHIKill = true;
+      ValNo->setHasPHIKill(true);
       DOUT << " +" << LR;
     }
   }
@@ -692,9 +696,9 @@ exit:
   LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
   bool Extend = OldLR != interval.end();
   VNInfo *ValNo = Extend
-    ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator);
+    ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
   if (MO.isEarlyClobber() && Extend)
-    ValNo->redefByEC = true;
+    ValNo->setHasRedefByEC(true);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
   interval.addKill(LR.valno, end);
@@ -750,7 +754,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       DOUT << " killed";
       end = getUseIndex(baseIndex) + 1;
       SeenDefUse = true;
-      goto exit;
+      break;
     } else if (mi->modifiesRegister(interval.reg, tri_)) {
       // Another instruction redefines the register before it is ever read.
       // Then the register is essentially dead at the instruction that defines
@@ -759,7 +763,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       DOUT << " dead";
       end = getDefIndex(start) + 1;
       SeenDefUse = true;
-      goto exit;
+      break;
     }
 
     baseIndex += InstrSlots::NUM;
@@ -771,7 +775,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
     }
   }
 
-exit:
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
     if (isAlias) {
@@ -783,7 +786,11 @@ exit:
     }
   }
 
-  LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator));
+  VNInfo *vni =
+    interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator);
+  vni->setIsPHIDef(true);
+  LiveRange LR(start, end, vni);
+  
   interval.addRange(LR);
   interval.addKill(LR.valno, end);
   DOUT << " +" << LR << '\n';
@@ -896,7 +903,7 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
 /// managing the allocated memory.
 LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
   LiveInterval *NewLI = createInterval(li->reg);
-  NewLI->Copy(*li, getVNInfoAllocator());
+  NewLI->Copy(*li, mri_, getVNInfoAllocator());
   return NewLI;
 }
 
@@ -1099,13 +1106,12 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
   for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
        i != e; ++i) {
     const VNInfo *VNI = *i;
-    unsigned DefIdx = VNI->def;
-    if (DefIdx == ~1U)
+    if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    if (DefIdx == ~0u)
+    if (!VNI->isDefAccurate())
       return false;
-    MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx);
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
     bool DefIsLoad = false;
     if (!ReMatDefMI ||
         !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -1450,7 +1456,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     if (HasUse) {
       if (CreatedNewVReg) {
         LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
-                     nI.getNextValue(~0U, 0, VNInfoAllocator));
+                     nI.getNextValue(0, 0, false, VNInfoAllocator));
         DOUT << " +" << LR;
         nI.addRange(LR);
       } else {
@@ -1464,7 +1470,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     }
     if (HasDef) {
       LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                   nI.getNextValue(~0U, 0, VNInfoAllocator));
+                   nI.getNextValue(0, 0, false, VNInfoAllocator));
       DOUT << " +" << LR;
       nI.addRange(LR);
     }
@@ -1840,14 +1846,14 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       unsigned index = getInstructionIndex(MI);
       if (HasUse) {
         LiveRange LR(getLoadIndex(index), getUseIndex(index),
-                     nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
         DOUT << " +" << LR;
         nI.addRange(LR);
         vrm.addRestorePoint(NewVReg, MI);
       }
       if (HasDef) {
         LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                     nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
         DOUT << " +" << LR;
         nI.addRange(LR);
         vrm.addSpillPoint(NewVReg, true, MI);
@@ -1961,12 +1967,11 @@ addIntervalsForSpills(const LiveInterval &li,
        i != e; ++i) {
     const VNInfo *VNI = *i;
     unsigned VN = VNI->id;
-    unsigned DefIdx = VNI->def;
-    if (DefIdx == ~1U)
+    if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    MachineInstr *ReMatDefMI = (DefIdx == ~0u)
-      ? 0 : getInstructionFromIndex(DefIdx);
+    MachineInstr *ReMatDefMI = VNI->isDefAccurate()
+      ? getInstructionFromIndex(VNI->def) : 0;
     bool dummy;
     if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
       // Remember how to remat the def of this val#.
@@ -1977,7 +1982,7 @@ addIntervalsForSpills(const LiveInterval &li,
       ReMatDefs[VN] = Clone;
 
       bool CanDelete = true;
-      if (VNI->hasPHIKill) {
+      if (VNI->hasPHIKill()) {
         // A kill is a phi node, not all of its uses can be rematerialized.
         // It must not be deleted.
         CanDelete = false;
@@ -2287,8 +2292,8 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
   LiveInterval& Interval = getOrCreateInterval(reg);
   VNInfo* VN = Interval.getNextValue(
             getInstructionIndex(startInst) + InstrSlots::DEF,
-            startInst, getVNInfoAllocator());
-  VN->hasPHIKill = true;
+            startInst, true, getVNInfoAllocator());
+  VN->setHasPHIKill(true);
   VN->kills.push_back(getMBBEndIdx(startInst->getParent()));
   LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF,
                getMBBEndIdx(startInst->getParent()) + 1, VN);
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 6228821..bd84508 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -359,12 +359,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     // That is, unless we are currently processing the last reference itself.
     LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
 
-  /* Partial uses. Mark register def dead and add implicit def of
-     sub-registers which are used.
-    FIXME: LiveIntervalAnalysis can't handle this yet!
-    EAX<dead>  = op  AL<imp-def>
-    That is, EAX def is dead but AL def extends pass it.
-    Enable this after live interval analysis is fixed to improve codegen!
+  // Partial uses. Mark register def dead and add implicit def of
+  // sub-registers which are used.
+  // EAX<dead>  = op  AL<imp-def>
+  // That is, EAX def is dead but AL def extends pass it.
+  // Enable this after live interval analysis is fixed to improve codegen!
   else if (!PhysRegUse[Reg]) {
     PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
     for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
@@ -377,7 +376,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
           PartUses.erase(*SS);
       }
     }
-  } */
+  }
   else
     LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
   return true;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 4f5ab1f..544d83a 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -16,6 +16,7 @@ using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
   VRegInfo.reserve(256);
+  RegAllocHints.reserve(256);
   RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
   UsedPhysRegs.resize(TRI.getNumRegs());
   
@@ -64,6 +65,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   // Add a reg, but keep track of whether the vector reallocated or not.
   void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
   VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
+  RegAllocHints.push_back(std::make_pair(0, 0));
 
   if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
     // The vector reallocated, handle this now.
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 97d4728..ae60c86 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -343,7 +343,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (CurrSLI->hasAtLeastOneValue())
     CurrSValNo = CurrSLI->getValNumInfo(0);
   else
-    CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+    CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
   return SS;
 }
 
@@ -637,8 +637,9 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
   if (Phis.count(MBB)) return Phis[MBB]; 
 
   unsigned StartIndex = LIs->getMBBStartIdx(MBB);
-  VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0,
-                                                LIs->getVNInfoAllocator());
+  VNInfo *RetVNI = Phis[MBB] =
+    LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator());
+
   if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
     
   // If there are no uses or defs between our starting point and the
@@ -654,7 +655,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
       IncomingVNs[*PI] = Incoming;
   }
     
-  if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) {
+  if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) {
     VNInfo* OldVN = RetVNI;
     VNInfo* NewVN = IncomingVNs.begin()->second;
     VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
@@ -678,7 +679,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
     // VNInfo to represent the joined value.
     for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
            IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
-      I->second->hasPHIKill = true;
+      I->second->setHasPHIKill(true);
       unsigned KillIndex = LIs->getMBBEndIdx(I->first);
       if (!LiveInterval::isKill(I->second, KillIndex))
         LI->addKill(I->second, KillIndex);
@@ -730,7 +731,9 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     unsigned DefIdx = LIs->getInstructionIndex(&*DI);
     DefIdx = LiveIntervals::getDefIndex(DefIdx);
     
-    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
+    assert(DI->getOpcode() != TargetInstrInfo::PHI &&
+           "Following NewVN isPHIDef flag incorrect. Fix me!");
+    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
     
     // If the def is a move, set the copy field.
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
@@ -793,7 +796,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
     
     // Bail out if we ever encounter a valno that has a PHI kill.  We can't
     // renumber these.
-    if (OldVN->hasPHIKill) return;
+    if (OldVN->hasPHIKill()) return;
     
     VNsToCopy.push_back(OldVN);
     
@@ -823,9 +826,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
     VNInfo* OldVN = *OI;
     
     // Copy the valno over
-    VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy, 
-                                       LIs->getVNInfoAllocator());
-    NewLI.copyValNumInfo(NewVN, OldVN);
+    VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator());
     NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
 
     // Remove the valno from the old interval
@@ -873,7 +874,7 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
   unsigned KillIdx = 0;
-  if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB)
+  if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
   else
     KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
@@ -942,7 +943,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
     if (CurrSLI->hasAtLeastOneValue())
       CurrSValNo = CurrSLI->getValNumInfo(0);
     else
-      CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+      CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
   }
   
   return FMI;
@@ -1032,13 +1033,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
   VNInfo *ValNo = LR->valno;
 
-  if (ValNo->def == ~1U) {
+  if (ValNo->isUnused()) {
     // Defined by a dead def? How can this be?
     assert(0 && "Val# is defined by a dead def?");
     abort();
   }
 
-  MachineInstr *DefMI = (ValNo->def != ~0U)
+  MachineInstr *DefMI = ValNo->isDefAccurate()
     ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
 
   // If this would create a new join point, do not split.
@@ -1072,8 +1073,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   unsigned SpillIndex = 0;
   MachineInstr *SpillMI = NULL;
   int SS = -1;
-  if (ValNo->def == ~0U) {
-    // If it's defined by a phi, we must split just before the barrier.
+  if (!ValNo->isDefAccurate()) {
+    // If we don't know where the def is we must split just before the barrier.
     if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
                             BarrierMBB, SS, RefsInMBB))) {
       SpillIndex = LIs->getInstructionIndex(SpillMI);
@@ -1254,17 +1255,16 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
       
       // We don't currently try to handle definitions with PHI kills, because
       // it would involve processing more than one VNInfo at once.
-      if (CurrVN->hasPHIKill) continue;
+      if (CurrVN->hasPHIKill()) continue;
       
       // We also don't try to handle the results of PHI joins, since there's
       // no defining instruction to analyze.
-      unsigned DefIdx = CurrVN->def;
-      if (DefIdx == ~0U || DefIdx == ~1U) continue;
+      if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
     
       // We're only interested in eliminating cruft introduced by the splitter,
       // is of the form load-use or load-use-store.  First, check that the
       // definition is a load, and remember what stack slot we loaded it from.
-      MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx);
+      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
       int FrameIndex;
       if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
       
@@ -1383,7 +1383,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
   if (DefMBB == BarrierMBB)
     return false;
   
-  if (LR->valno->hasPHIKill)
+  if (LR->valno->hasPHIKill())
     return false;
   
   unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 804fae5..41a42fd 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -281,7 +281,8 @@ namespace {
     /// getFreePhysReg - return a free physical register for this virtual
     /// register interval if we have one, otherwise return 0.
     unsigned getFreePhysReg(LiveInterval* cur);
-    unsigned getFreePhysReg(const TargetRegisterClass *RC,
+    unsigned getFreePhysReg(LiveInterval* cur,
+                            const TargetRegisterClass *RC,
                             unsigned MaxInactiveCount,
                             SmallVector<unsigned, 256> &inactiveCounts,
                             bool SkipDGRegs);
@@ -352,11 +353,12 @@ void RALinScan::ComputeRelatedRegClasses() {
 /// different register classes or because the coalescer was overly
 /// conservative.
 unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
-  if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue())
+  unsigned Preference = vrm_->getRegAllocPref(cur.reg);
+  if ((Preference && Preference == Reg) || !cur.containsOneValue())
     return Reg;
 
   VNInfo *vni = cur.begin()->valno;
-  if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+  if (!vni->def || vni->isUnused() || !vni->isDefAccurate())
     return Reg;
   MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
@@ -584,7 +586,7 @@ void RALinScan::linearScan()
   // register allocator had to spill other registers in its register class.
   if (ls_->getNumIntervals() == 0)
     return;
-  if (!vrm_->FindUnusedRegisters(tri_, li_))
+  if (!vrm_->FindUnusedRegisters(li_))
     return;
 }
 
@@ -743,7 +745,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
   if (SI.hasAtLeastOneValue())
     VNI = SI.getValNumInfo(0);
   else
-    VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator());
+    VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator());
 
   LiveInterval &RI = li_->getInterval(cur->reg);
   // FIXME: This may be overly conservative.
@@ -897,7 +899,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // This is an implicitly defined live interval, just assign any register.
   const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
   if (cur->empty()) {
-    unsigned physReg = cur->preference;
+    unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
       physReg = *RC->allocation_order_begin(*mf_);
     DOUT <<  tri_->getName(physReg) << '\n';
@@ -917,9 +919,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // register class, then we should try to assign it the same register.
   // This can happen when the move is from a larger register class to a smaller
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
-  if (!cur->preference && cur->hasAtLeastOneValue()) {
+  if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if (vni->def && vni->def != ~1U && vni->def != ~0U) {
+    if (vni->def && !vni->isUnused() && vni->isDefAccurate()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
       if (CopyMI &&
@@ -935,7 +937,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
           if (DstSubReg)
             Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
           if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
-            cur->preference = Reg;
+            mri_->setRegAllocationHint(cur->reg, 0, Reg);
         }
       }
     }
@@ -1044,7 +1046,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
       // "Downgrade" physReg to try to keep physReg from being allocated until
       // the next reload from the same SS is allocated. 
-      NextReloadLI->preference = physReg;
+      mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
       DowngradeRegister(cur, physReg);
     }
     return;
@@ -1071,7 +1073,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
 
   // Find a register to spill.
   float minWeight = HUGE_VALF;
-  unsigned minReg = 0; /*cur->preference*/;  // Try the pref register first.
+  unsigned minReg = 0;
 
   bool Found = false;
   std::vector<std::pair<unsigned,float> > RegsWeights;
@@ -1290,7 +1292,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
       // It interval has a preference, it must be defined by a copy. Clear the
       // preference now since the source interval allocation may have been
       // undone as well.
-      i->preference = 0;
+      mri_->setRegAllocationHint(i->reg, 0, 0);
     else {
       UpgradeRegister(ii->second);
     }
@@ -1346,15 +1348,23 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   }
 }
 
-unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
+unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
+                                   const TargetRegisterClass *RC,
                                    unsigned MaxInactiveCount,
                                    SmallVector<unsigned, 256> &inactiveCounts,
                                    bool SkipDGRegs) {
   unsigned FreeReg = 0;
   unsigned FreeRegInactiveCount = 0;
 
-  TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
-  TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+  std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
+  // Resolve second part of the hint (if possible) given the current allocation.
+  unsigned physReg = Hint.second;
+  if (physReg &&
+      TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+    physReg = vrm_->getPhys(physReg);
+
+  TargetRegisterClass::iterator I, E;
+  tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
   assert(I != E && "No allocatable register in this register class!");
 
   // Scan for the first available register.
@@ -1377,7 +1387,7 @@ unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
   // return this register.
   if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
     return FreeReg;
-  
+ 
   // Continue scanning the registers, looking for the one with the highest
   // inactive count.  Alkis found that this reduced register pressure very
   // slightly on X86 (in rev 1.94 of this file), though this should probably be
@@ -1428,20 +1438,21 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
 
   // If copy coalescer has assigned a "preferred" register, check if it's
   // available first.
-  if (cur->preference) {
-    DOUT << "(preferred: " << tri_->getName(cur->preference) << ") ";
-    if (isRegAvail(cur->preference) && 
-        RC->contains(cur->preference))
-      return cur->preference;
+  unsigned Preference = vrm_->getRegAllocPref(cur->reg);
+  if (Preference) {
+    DOUT << "(preferred: " << tri_->getName(Preference) << ") ";
+    if (isRegAvail(Preference) && 
+        RC->contains(Preference))
+      return Preference;
   }
 
   if (!DowngradedRegs.empty()) {
-    unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts,
+    unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
                                       true);
     if (FreeReg)
       return FreeReg;
   }
-  return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false);
+  return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
 }
 
 FunctionPass* llvm::createLinearScanRegisterAllocator() {
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 61450a7..89e2c59 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -651,7 +651,7 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
   if (stackInterval.getNumValNums() != 0)
     vni = stackInterval.getValNumInfo(0);
   else
-    vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator());
+    vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator());
 
   LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
   stackInterval.MergeRangesInAsValue(rhsInterval, vni);
@@ -733,8 +733,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
          itr != end; ++itr) {
     LiveInterval *li = *itr;
 
-    unsigned physReg = li->preference;
-
+    unsigned physReg = vrm->getRegAllocPref(li->reg);
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
       physReg = *liRC->allocation_order_begin(*mf);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4a7dbeb..24fccf0 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -47,7 +47,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/DebugLoc.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -361,7 +360,7 @@ bool FastISel::SelectCall(User *I) {
           // Returned ID is 0 if this is unbalanced "end of inlined
           // scope". This could happen if optimizer eats dbg intrinsics
           // or "beginning of inlined scope" is not recoginized due to
-          // missing location info. In such cases, do ignore this region.end.
+          // missing location info. In such cases, ignore this region.end.
           BuildMI(MBB, DL, II).addImm(ID);
       } else {
         const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f3c2833..1bb8090 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2768,6 +2768,53 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                    ISD::SETULT : ISD::SETUGT));
     break;
   }
+  case ISD::UMULO:
+  case ISD::SMULO: {
+    MVT VT = Node->getValueType(0);
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue BottomHalf;
+    SDValue TopHalf;
+    static unsigned Ops[2][3] =
+        { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+          { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+    bool isSigned = Node->getOpcode() == ISD::SMULO;
+    if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+      BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+      TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+    } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+      BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+                               RHS);
+      TopHalf = BottomHalf.getValue(1);
+    } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) {
+      MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2);
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                            DAG.getIntPtrConstant(1));
+    } else {
+      // FIXME: We should be able to fall back to a libcall with an illegal
+      // type in some cases cases.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      assert(0 && "Don't know how to expand this operation yet!");
+    }
+    if (isSigned) {
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+      Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+                             ISD::SETNE);
+    } else {
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+                             DAG.getConstant(0, VT), ISD::SETNE);
+    }
+    Results.push_back(BottomHalf);
+    Results.push_back(TopHalf);
+    break;
+  }
   case ISD::BUILD_PAIR: {
     MVT PairTy = Node->getValueType(0);
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index b7d7818..6e5adee 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -95,14 +95,13 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   if (InVT.isVector() && OutVT.isInteger()) {
     // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
     // is legal but the result is not.
-    MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OutVT), 2);
+    MVT NVT = MVT::getVectorVT(NOutVT, 2);
 
     if (isTypeLegal(NVT)) {
       SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
-      MVT EltNVT = NVT.getVectorElementType();
-      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp,
+      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
                        DAG.getIntPtrConstant(0));
-      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp,
+      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
                        DAG.getIntPtrConstant(1));
 
       if (TLI.isBigEndian())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 93750d6..48ebd0f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -5317,8 +5317,12 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
         if ((OpFlag & 7) == 2 /*REGDEF*/
             || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
-          assert(!OpInfo.isIndirect &&
-                 "Don't know how to handle tied indirect register inputs yet!");
+          if (OpInfo.isIndirect) {
+            cerr << "llvm: error: "
+                    "Don't know how to handle tied indirect "
+                    "register inputs yet!\n";
+            exit(1);
+          }
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ab4cd51..a771d46 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -171,6 +171,8 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
   Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
   Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
+  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
   Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
   Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
   Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
@@ -183,6 +185,8 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
   Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
   Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
+  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
   Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
   Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
   Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
@@ -271,6 +275,10 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
 /// UNKNOWN_LIBCALL if there is none.
 RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
   if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F32_I16;
     if (RetVT == MVT::i32)
       return FPTOSINT_F32_I32;
     if (RetVT == MVT::i64)
@@ -306,6 +314,10 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
 /// UNKNOWN_LIBCALL if there is none.
 RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
   if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F32_I16;
     if (RetVT == MVT::i32)
       return FPTOUINT_F32_I32;
     if (RetVT == MVT::i64)
@@ -2584,8 +2596,12 @@ bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,
   // Check that operand of the RET node sources from the CALL node. The RET node
   // has at least two operands. Operand 0 holds the chain. Operand 1 holds the
   // value.
+  // Also we need to check that there is no code in between the call and the
+  // return. Hence we also check that the incomming chain to the return sources
+  // from the outgoing chain of the call.
   if (NumOps > 1 &&
-      IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0))
+      IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) &&
+      Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
     return true;
   // void return: The RET node  has the chain result value of the CALL node as
   // input.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 2bc234f..2034805 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -141,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   // The live interval of ECX is represented as this:
   // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
   // The coalescer has no idea there was a def in the middle of [174,230].
-  if (AValNo->redefByEC)
+  if (AValNo->hasRedefByEC())
     return false;
   
   // If AValNo is defined as a copy from IntB, we can potentially process this.  
@@ -203,7 +203,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
       LiveInterval &SRLI = li_->getInterval(*SR);
       SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                 SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator())));
+                              SRLI.getNextValue(FillerStart, 0, true,
+                                                li_->getVNInfoAllocator())));
     }
   }
 
@@ -304,8 +305,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill)
+  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+  // tested?
+  if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
+      AValNo->isUnused() || AValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
   const TargetInstrDesc &TID = DefMI->getDesc();
@@ -351,7 +354,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
   NewMI->getOperand(OpIdx).setIsKill();
 
-  bool BHasPHIKill = BValNo->hasPHIKill;
+  bool BHasPHIKill = BValNo->hasPHIKill();
   SmallVector<VNInfo*, 4> BDeadValNos;
   SmallVector<unsigned, 4> BKills;
   std::map<unsigned, unsigned> BExtend;
@@ -403,7 +406,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       // extended to the end of the existing live range defined by the copy.
       unsigned DefIdx = li_->getDefIndex(UseIdx);
       const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
-      BHasPHIKill |= DLR->valno->hasPHIKill;
+      BHasPHIKill |= DLR->valno->hasPHIKill();
       assert(DLR->valno->def == DefIdx);
       BDeadValNos.push_back(DLR->valno);
       BExtend[DLR->start] = DLR->end;
@@ -462,7 +465,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     }
   }
   IntB.addKills(ValNo, BKills);
-  ValNo->hasPHIKill = BHasPHIKill;
+  ValNo->setHasPHIKill(BHasPHIKill);
 
   DOUT << "   result = "; IntB.print(DOUT, tri_);
   DOUT << "\n";
@@ -578,8 +581,10 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill)
+  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+  // tested?
+  if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
+      ValNo->isUnused() || ValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
   const TargetInstrDesc &TID = DefMI->getDesc();
@@ -616,19 +621,17 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     }
 
   MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
-  CopyMI->removeFromParent();
   tii_->reMaterialize(*MBB, MII, DstReg, DefMI);
   MachineInstr *NewMI = prior(MII);
 
   if (checkForDeadDef) {
-      // PR4090 fix: Trim interval failed because there was no use of the
-      // source interval in this MBB. If the def is in this MBB too then we
-      // should mark it dead:
-      if (DefMI->getParent() == MBB) {
-        DefMI->addRegisterDead(SrcInt.reg, tri_);
-        SrcLR->end = SrcLR->start + 1;
-      }
- 
+    // PR4090 fix: Trim interval failed because there was no use of the
+    // source interval in this MBB. If the def is in this MBB too then we
+    // should mark it dead:
+    if (DefMI->getParent() == MBB) {
+      DefMI->addRegisterDead(SrcInt.reg, tri_);
+      SrcLR->end = SrcLR->start + 1;
+    }
   }
 
   // CopyMI may have implicit operands, transfer them over to the newly
@@ -647,7 +650,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   }
 
   li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
-  MBB->getParent()->DeleteMachineInstr(CopyMI);
+  CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
   ReMatDefs.insert(DefMI);
   ++NumReMats;
@@ -673,7 +676,7 @@ bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
     return false;
   unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1;
   if (DstLR->valno->kills.size() == 1 &&
-      DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill)
+      DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill())
     return true;
   return false;
 }
@@ -937,7 +940,7 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
   LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx);
   if (LR == li.end())
     return false;
-  if (LR->valno->hasPHIKill)
+  if (LR->valno->hasPHIKill())
     return false;
   if (LR->valno->def != CopyIdx)
     return false;
@@ -965,11 +968,11 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
 }
 
 
-/// RemoveCopiesFromValNo - The specified value# is defined by an implicit
-/// def and it is being removed. Turn all copies from this value# into
-/// identity copies so they will be removed.
-void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
-                                                     VNInfo *VNI) {
+/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
+/// implicit_def and it is being removed. Turn all copies from this value#
+/// into implicit_defs.
+void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li,
+                                                            VNInfo *VNI) {
   SmallVector<MachineInstr*, 4> ImpDefs;
   MachineOperand *LastUse = NULL;
   unsigned LastUseIdx = li_->getUseIndex(VNI->def);
@@ -979,9 +982,8 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
     MachineInstr *MI = &*RI;
     ++RI;
     if (MO->isDef()) {
-      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
         ImpDefs.push_back(MI);
-      }
       continue;
     }
     if (JoinedCopies.count(MI))
@@ -994,13 +996,18 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
         SrcReg == li.reg) {
-      // Each use MI may have multiple uses of this register. Change them all.
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == li.reg)
-          MO.setReg(DstReg);
-      }
-      JoinedCopies.insert(MI);
+      // Change it to an implicit_def.
+      MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+      for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i)
+        MI->RemoveOperand(i);
+      // It's no longer a copy, update the valno it defines.
+      unsigned DefIdx = li_->getDefIndex(UseIdx);
+      LiveInterval &DstInt = li_->getInterval(DstReg);
+      LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx);
+      assert(DLR != DstInt.end() && "Live range not found!");
+      assert(DLR->valno->copy == MI);
+      DLR->valno->copy = NULL;
+      ReMatCopies.insert(MI);
     } else if (UseIdx > LastUseIdx) {
       LastUseIdx = UseIdx;
       LastUse = MO;
@@ -1265,6 +1272,17 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   return true;
 }
 
+/// getRegAllocPreference - Return register allocation preference register.
+///
+static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF,
+                                      MachineRegisterInfo *MRI,
+                                      const TargetRegisterInfo *TRI) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return 0;
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF);
+}
+
 /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
 /// which are the src/dst of the copy instruction CopyMI.  This returns true
 /// if the copy was successfully coalesced away. If it is not currently
@@ -1566,7 +1584,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       if (PhysJoinTweak) {
         if (SrcIsPhys) {
           if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
-            DstInt.preference = SrcReg;
+            mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
             ++numAborts;
             DOUT << "\tMay tie down a physical register, abort!\n";
             Again = true;  // May be possible to coalesce later.
@@ -1574,7 +1592,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           }
         } else {
           if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
-            SrcInt.preference = DstReg;
+            mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
             ++numAborts;
             DOUT << "\tMay tie down a physical register, abort!\n";
             Again = true;  // May be possible to coalesce later.
@@ -1598,7 +1616,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         if (Length > Threshold &&
             (((float)std::distance(mri_->use_begin(JoinVReg),
                                    mri_->use_end()) / Length) < Ratio)) {
-          JoinVInt.preference = JoinPReg;
+          mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
           ++numAborts;
           DOUT << "\tMay tie down a physical register, abort!\n";
           Again = true;  // May be possible to coalesce later.
@@ -1669,9 +1687,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
              E = SavedLI->vni_end(); I != E; ++I) {
         const VNInfo *ValNo = *I;
         VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy,
+                                                false, // updated at *
                                                 li_->getVNInfoAllocator());
-        NewValNo->hasPHIKill = ValNo->hasPHIKill;
-        NewValNo->redefByEC = ValNo->redefByEC;
+        NewValNo->setFlags(ValNo->getFlags()); // * updated here.
         RealInt.addKills(NewValNo, ValNo->kills);
         RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
       }
@@ -1691,7 +1709,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       !SrcIsPhys && !DstIsPhys) {
     if ((isExtSubReg && !Swapped) ||
         ((isInsSubReg || isSubRegToReg) && Swapped)) {
-      ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator());
+      ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator());
       std::swap(SrcReg, DstReg);
       std::swap(ResSrcInt, ResDstInt);
     }
@@ -1710,7 +1728,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(),
            e = ResSrcInt->vni_end(); i != e; ++i) {
       const VNInfo *vni = *i;
-      if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+      // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
+      if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate())
         continue;
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx;
@@ -1747,6 +1766,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // being merged.
   li_->removeInterval(SrcReg);
 
+  // Update regalloc hint.
+  tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_);
+
   // Manually deleted the live interval copy.
   if (SavedLI) {
     SavedLI->clear();
@@ -1762,7 +1784,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     VNInfo *ImpVal = LR->valno;
     assert(ImpVal->def == CopyIdx);
     unsigned NextDef = LR->end;
-    RemoveCopiesFromValNo(*ResDstInt, ImpVal);
+    TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal);
     ResDstInt->removeValNo(ImpVal);
     LR = ResDstInt->FindLiveRangeContaining(NextDef);
     if (LR != ResDstInt->end() && LR->valno->def == NextDef) {
@@ -1778,11 +1800,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // If resulting interval has a preference that no longer fits because of subreg
   // coalescing, just clear the preference.
-  if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
+  unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
+  if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
       TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
     const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
-    if (!RC->contains(ResDstInt->preference))
-      ResDstInt->preference = 0;
+    if (!RC->contains(Preference))
+      mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
   }
 
   DOUT << "\n\t\tJoined.  Result = "; ResDstInt->print(DOUT, tri_);
@@ -1856,7 +1879,8 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
   unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
   if (SrcReg == Reg)
     return true;
-  if (LR->valno->def == ~0U &&
+  // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
+  if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) &&
       TargetRegisterInfo::isPhysicalRegister(li.reg) &&
       *tri_->getSuperRegisters(li.reg)) {
     // It's a sub-register live interval, we may not have precise information.
@@ -2025,12 +2049,20 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
   
   // Okay, the final step is to loop over the RHS live intervals, adding them to
   // the LHS.
-  LHSValNo->hasPHIKill |= VNI->hasPHIKill;
+  if (VNI->hasPHIKill())
+    LHSValNo->setHasPHIKill(true);
   LHS.addKills(LHSValNo, VNI->kills);
   LHS.MergeRangesInAsValue(RHS, LHSValNo);
   LHS.weight += RHS.weight;
-  if (RHS.preference && !LHS.preference)
-    LHS.preference = RHS.preference;
+
+  // Update regalloc hint if both are virtual registers.
+  if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && 
+      TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
+    std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
+    std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
+    if (RHSPref != LHSPref)
+      mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second);
+  }
 
   // Update the liveintervals of sub-registers.
   if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
@@ -2185,7 +2217,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->def == ~1U || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
         continue;
       
       // DstReg is known to be a register in the LHS interval.  If the src is
@@ -2202,7 +2234,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->def == ~1U || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
         continue;
       
       // DstReg is known to be a register in the RHS interval.  If the src is
@@ -2222,7 +2254,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          i != e; ++i) {
       VNInfo *VNI = *i;
       unsigned VN = VNI->id;
-      if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) 
+      if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 
         continue;
       ComputeUltimateVN(VNI, NewVNInfo,
                         LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
@@ -2232,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          i != e; ++i) {
       VNInfo *VNI = *i;
       unsigned VN = VNI->id;
-      if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+      if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
         continue;
       // If this value number isn't a copy from the LHS, it's a new number.
       if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
@@ -2296,7 +2328,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     VNInfo *VNI = I->first;
     unsigned LHSValID = LHSValNoAssignments[VNI->id];
     LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def);
-    NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill;
+    if (VNI->hasPHIKill())
+      NewVNInfo[LHSValID]->setHasPHIKill(true);
     RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
   }
 
@@ -2306,7 +2339,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     VNInfo *VNI = I->first;
     unsigned RHSValID = RHSValNoAssignments[VNI->id];
     LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def);
-    NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill;
+    if (VNI->hasPHIKill())
+      NewVNInfo[RHSValID]->setHasPHIKill(true);
     LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
   }
 
@@ -2315,10 +2349,12 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
   if ((RHS.ranges.size() > LHS.ranges.size() &&
       TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
       TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
-    RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo);
+    RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo,
+             mri_);
     Swapped = true;
   } else {
-    LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo);
+    LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+             mri_);
     Swapped = false;
   }
   return true;
@@ -2620,6 +2656,11 @@ SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
     return false;
   LiveInterval &DstInt = li_->getInterval(DstReg);
   const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx);
+  // If the valno extends beyond this basic block, then it's not safe to delete
+  // the val# or else livein information won't be correct.
+  MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end);
+  if (EndMBB != MBB)
+    return false;
   DstInt.removeValNo(DstLR->valno);
   CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
   for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
@@ -2800,7 +2841,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
       }
 
       // Slightly prefer live interval that has been assigned a preferred reg.
-      if (LI.preference)
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
+      if (Hint.first || Hint.second)
         LI.weight *= 1.01F;
 
       // Divide the weight of the interval by its size.  This encourages 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index a495bfd..d2c5581 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -219,10 +219,10 @@ namespace llvm {
     bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
                                LiveInterval &li, LiveInterval &ImpLi) const;
 
-    /// RemoveCopiesFromValNo - The specified value# is defined by an implicit
-    /// def and it is being removed. Turn all copies from this value# into
-    /// identity copies so they will be removed.
-    void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI);
+    /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
+    /// implicit_def and it is being removed. Turn all copies from this value#
+    /// into implicit_defs.
+    void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI);
 
     /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
     /// a virtual destination register with physical source register.
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index ce63121..919a0ce 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -39,7 +39,8 @@ protected:
   VirtRegMap *vrm;
   
   /// Construct a spiller base. 
-  SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+  SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+              VirtRegMap *vrm) :
     mf(mf), lis(lis), ls(ls), vrm(vrm)
   {
     mfi = mf->getFrameInfo();
@@ -47,16 +48,24 @@ protected:
     tii = mf->getTarget().getInstrInfo();
   }
 
-  /// Insert a store of the given vreg to the given stack slot immediately
-  /// after the given instruction. Returns the base index of the inserted
-  /// instruction. The caller is responsible for adding an appropriate
-  /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
-                          unsigned newVReg,
-                          const TargetRegisterClass *trc) {
-    MachineBasicBlock::iterator nextInstItr(mi); 
-    ++nextInstItr;
+  /// Ensures there is space before the given machine instruction, returns the
+  /// instruction's new number.
+  unsigned makeSpaceBefore(MachineInstr *mi) {
+    if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+      lis->scaleNumbering(2);
+      ls->scaleNumbering(2);
+    }
+
+    unsigned miIdx = lis->getInstructionIndex(mi);
 
+    assert(lis->hasGapBeforeInstr(miIdx));
+    
+    return miIdx;
+  }
+
+  /// Ensure there is space after the given machine instruction, returns the
+  /// instruction's new number.
+  unsigned makeSpaceAfter(MachineInstr *mi) {
     if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
       lis->scaleNumbering(2);
       ls->scaleNumbering(2);
@@ -66,7 +75,24 @@ protected:
 
     assert(lis->hasGapAfterInstr(miIdx));
 
-    tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg,
+    return miIdx;
+  }  
+
+
+  /// Insert a store of the given vreg to the given stack slot immediately
+  /// after the given instruction. Returns the base index of the inserted
+  /// instruction. The caller is responsible for adding an appropriate
+  /// LiveInterval to the LiveIntervals analysis.
+  unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
+                          unsigned vreg,
+                          const TargetRegisterClass *trc) {
+
+    MachineBasicBlock::iterator nextInstItr(mi); 
+    ++nextInstItr;
+
+    unsigned miIdx = makeSpaceAfter(mi);
+
+    tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
                              true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(mi);
     ++storeInstItr;
@@ -81,25 +107,35 @@ protected:
     return storeInstIdx;
   }
 
+  void insertStoreOnInterval(LiveInterval *li,
+                             MachineInstr *mi, unsigned ss,
+                             unsigned vreg,
+                             const TargetRegisterClass *trc) {
+
+    unsigned storeInstIdx = insertStoreFor(mi, ss, vreg, trc);
+    unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
+             end = lis->getUseIndex(storeInstIdx);
+
+    VNInfo *vni =
+      li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
+    vni->kills.push_back(storeInstIdx);
+    LiveRange lr(start, end, vni);
+      
+    li->addRange(lr);
+  }
+
   /// Insert a load of the given veg from the given stack slot immediately
   /// before the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
   unsigned insertLoadFor(MachineInstr *mi, unsigned ss,
-                         unsigned newVReg,
+                         unsigned vreg,
                          const TargetRegisterClass *trc) {
     MachineBasicBlock::iterator useInstItr(mi);
-
-    if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
-      lis->scaleNumbering(2);
-      ls->scaleNumbering(2);
-    }
-
-    unsigned miIdx = lis->getInstructionIndex(mi);
-
-    assert(lis->hasGapBeforeInstr(miIdx));
-    
-    tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc);
+  
+    unsigned miIdx = makeSpaceBefore(mi);
+  
+    tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(mi);
     --loadInstItr;
     MachineInstr *loadInst = &*loadInstItr;
@@ -113,6 +149,24 @@ protected:
     return loadInstIdx;
   }
 
+  void insertLoadOnInterval(LiveInterval *li,
+                            MachineInstr *mi, unsigned ss, 
+                            unsigned vreg,
+                            const TargetRegisterClass *trc) {
+
+    unsigned loadInstIdx = insertLoadFor(mi, ss, vreg, trc);
+    unsigned start = lis->getDefIndex(loadInstIdx),
+             end = lis->getUseIndex(lis->getInstructionIndex(mi));
+
+    VNInfo *vni =
+      li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
+    vni->kills.push_back(lis->getInstructionIndex(mi));
+    LiveRange lr(start, end, vni);
+
+    li->addRange(lr);
+  }
+
+
 
   /// Add spill ranges for every use/def of the live interval, inserting loads
   /// immediately before each use, and stores after each def. No folding is
@@ -173,35 +227,16 @@ protected:
       assert(hasUse || hasDef);
 
       if (hasUse) {
-        unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc);
-        unsigned start = lis->getDefIndex(loadInstIdx),
-                 end = lis->getUseIndex(lis->getInstructionIndex(mi));
-
-        VNInfo *vni =
-          newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator());
-        vni->kills.push_back(lis->getInstructionIndex(mi));
-        LiveRange lr(start, end, vni);
-
-        newLI->addRange(lr);
+        insertLoadOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       if (hasDef) {
-        unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc);
-        unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
-                 end = lis->getUseIndex(storeInstIdx);
-
-        VNInfo *vni =
-          newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator());
-        vni->kills.push_back(storeInstIdx);
-        LiveRange lr(start, end, vni);
-      
-        newLI->addRange(lr);
+        insertStoreOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       added.push_back(newLI);
     }
 
-
     return added;
   }
 
@@ -212,13 +247,44 @@ protected:
 /// folding.
 class TrivialSpiller : public SpillerBase {
 public:
-  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+
+  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+                 VirtRegMap *vrm) :
     SpillerBase(mf, lis, ls, vrm) {}
 
   std::vector<LiveInterval*> spill(LiveInterval *li) {
     return trivialSpillEverywhere(li);
   }
 
+  std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno)  {
+    std::vector<LiveInterval*> spillIntervals;
+    MachineBasicBlock::iterator storeInsertPoint;
+
+    if (valno->isDefAccurate()) {
+      // If we have an accurate def we can just grab an iterator to the instr
+      // after the def.
+      storeInsertPoint =
+        next(MachineBasicBlock::iterator(lis->getInstructionFromIndex(valno->def)));
+    } else {
+      // If the def info isn't accurate we check if this is a PHI def.
+      // If it is then def holds the index of the defining Basic Block, and we
+      // can use that to get an insertion point.
+      if (valno->isPHIDef()) {
+
+      } else {
+        // We have no usable def info. We can't split this value sensibly.
+        // FIXME: Need sensible feedback for "failure to split", an empty
+        // set of spill intervals could be reasonably returned from a
+        // split where both the store and load are folded.
+        return spillIntervals;
+      }
+    }
+
+        
+
+    return spillIntervals;
+  }
+
 };
 
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index cad054d..9c3900d 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -13,11 +13,14 @@
 #include <vector>
 
 namespace llvm {
+
   class LiveInterval;
   class LiveIntervals;
   class LiveStacks;
   class MachineFunction;
+  class MachineInstr;
   class VirtRegMap;
+  class VNInfo;
 
   /// Spiller interface.
   ///
@@ -26,7 +29,15 @@ namespace llvm {
   class Spiller {
   public:
     virtual ~Spiller() = 0;
+
+    /// Spill the given live range. The method used will depend on the Spiller
+    /// implementation selected.
     virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0;
+
+    /// Intra-block split.
+    virtual std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li,
+                                                       VNInfo *valno) = 0;
+
   };
 
   /// Create and return a spiller object, as specified on the command line.
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index a2c1255..ca99528 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -827,7 +827,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
         // Add a live range for the new vreg
         LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
         VNInfo* FirstVN = *Int.vni_begin();
-        FirstVN->hasPHIKill = false;
+        FirstVN->setHasPHIKill(false);
         if (I->getOperand(i).isKill())
           FirstVN->kills.push_back(
                          LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
@@ -886,10 +886,7 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
     VNInfo* OldVN = R.valno;
     VNInfo*& NewVN = VNMap[OldVN];
     if (!NewVN) {
-      NewVN = LHS.getNextValue(OldVN->def,
-                               OldVN->copy,
-                               LI.getVNInfoAllocator());
-      NewVN->kills = OldVN->kills;
+      NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
     }
     
     LiveRange LR (R.start, R.end, NewVN);
@@ -987,7 +984,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       LiveInterval& Int = LI.getOrCreateInterval(I->first);
       const LiveRange* LR =
                        Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
-      LR->valno->hasPHIKill = true;
+      LR->valno->setHasPHIKill(true);
       
       I->second.erase(SI->first);
     }
@@ -1037,7 +1034,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       // now has an unknown def.
       unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
       const LiveRange* PLR = PI.getLiveRangeContaining(idx);
-      PLR->valno->def = ~0U;
+      PLR->valno->setIsPHIDef(true);
       LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
                    PLR->start, PLR->valno);
       PI.addRange(R);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 29637b9..4d3417f 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -51,6 +51,7 @@ static RegisterPass<VirtRegMap>
 X("virtregmap", "Virtual Register Map");
 
 bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+  MRI = &mf.getRegInfo();
   TII = mf.getTarget().getInstrInfo();
   TRI = mf.getTarget().getRegisterInfo();
   MF = &mf;
@@ -98,6 +99,18 @@ void VirtRegMap::grow() {
   ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
 }
 
+unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
+  unsigned physReg = Hint.second;
+  if (physReg &&
+      TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+    physReg = getPhys(physReg);
+  if (Hint.first == 0)
+    return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+      ? physReg : 0;
+  return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+}
+
 int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(virtReg));
   assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
@@ -213,8 +226,7 @@ void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
 
 /// FindUnusedRegisters - Gather a list of allocatable registers that
 /// have not been allocated to any virtual register.
-bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI,
-                                     LiveIntervals* LIs) {
+bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   unsigned NumRegs = TRI->getNumRegs();
   UnusedRegs.reset();
   UnusedRegs.resize(NumRegs);
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 507557d..fe767b7 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -31,6 +31,7 @@ namespace llvm {
   class LiveIntervals;
   class MachineInstr;
   class MachineFunction;
+  class MachineRegisterInfo;
   class TargetInstrInfo;
   class TargetRegisterInfo;
 
@@ -47,6 +48,7 @@ namespace llvm {
                           std::pair<unsigned, ModRef> > MI2VirtMapTy;
 
   private:
+    MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineFunction *MF;
@@ -190,6 +192,9 @@ namespace llvm {
       grow();
     }
 
+    /// @brief returns the register allocation preference.
+    unsigned getRegAllocPref(unsigned virtReg);
+
     /// @brief records virtReg is a split live interval from SReg.
     void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
       Virt2SplitMap[virtReg] = SReg;
@@ -445,8 +450,7 @@ namespace llvm {
 
     /// FindUnusedRegisters - Gather a list of allocatable registers that
     /// have not been allocated to any virtual register.
-    bool FindUnusedRegisters(const TargetRegisterInfo *TRI,
-                             LiveIntervals* LIs);
+    bool FindUnusedRegisters(LiveIntervals* LIs);
 
     /// HasUnusedRegisters - Return true if there are any allocatable registers
     /// that have not been allocated to any virtual register.
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 83397a58..401a226 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -118,7 +118,7 @@ int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
                           char **OutError) {
   std::string Error;
   if (ExecutionEngine *JIT =
-      ExecutionEngine::createJIT(unwrap(MP), &Error, 0,
+      ExecutionEngine::create(unwrap(MP), false, &Error,
                                  (CodeGenOpt::Level)OptLevel)) {
     *OutJIT = wrap(JIT);
     return 0;
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 7c8ce70..e7a76cc 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_library(LLVMSupport
   PrettyStackTrace.cpp
   SlowOperationInformer.cpp
   SmallPtrSet.cpp
+  SourceMgr.cpp
   Statistic.cpp
   Streams.cpp
   StringExtras.cpp
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 6de6575..4e655a0 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -14,18 +14,15 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Config/config.h"
 #include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
 #include <cassert>
 using namespace llvm;
 
 static const ManagedStaticBase *StaticList = 0;
 
-static sys::Mutex* ManagedStaticMutex = 0;
-
 void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
                                               void (*Deleter)(void*)) const {
-  if (ManagedStaticMutex) {
-    ManagedStaticMutex->acquire();
+  if (llvm_is_multithreaded()) {
+    llvm_acquire_global_lock();
 
     if (Ptr == 0) {
       void* tmp = Creator ? Creator() : 0;
@@ -39,7 +36,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
       StaticList = this;
     }
 
-    ManagedStaticMutex->release();
+    llvm_release_global_lock();
   } else {
     assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
            "Partially initialized ManagedStatic!?");
@@ -68,24 +65,11 @@ void ManagedStaticBase::destroy() const {
   DeleterFn = 0;
 }
 
-bool llvm::llvm_start_multithreaded() {
-#if LLVM_MULTITHREADED
-  assert(ManagedStaticMutex == 0 && "Multithreaded LLVM already initialized!");
-  ManagedStaticMutex = new sys::Mutex(true);
-  return true;
-#else
-  return false;
-#endif
-}
-
 /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
 void llvm::llvm_shutdown() {
   while (StaticList)
     StaticList->destroy();
 
-  if (ManagedStaticMutex) {
-    delete ManagedStaticMutex;
-    ManagedStaticMutex = 0;
-  }
+  if (llvm_is_multithreaded()) llvm_stop_multithreaded();
 }
 
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
new file mode 100644
index 0000000..d789f10
--- /dev/null
+++ b/lib/Support/SourceMgr.cpp
@@ -0,0 +1,127 @@
+//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceMgr class.  This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+SourceMgr::~SourceMgr() {
+  while (!Buffers.empty()) {
+    delete Buffers.back().Buffer;
+    Buffers.pop_back();
+  }
+}
+
+/// AddIncludeFile - Search for a file with the specified name in the current
+/// directory or in one of the IncludeDirs.  If no file is found, this returns
+/// ~0, otherwise it returns the buffer ID of the stacked file.
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+                                   SMLoc IncludeLoc) {
+  
+  MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str());
+
+  // If the file didn't exist directly, see if it's in an include path.
+  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+    std::string IncFile = IncludeDirectories[i] + "/" + Filename;
+    NewBuf = MemoryBuffer::getFile(IncFile.c_str());
+  }
+ 
+  if (NewBuf == 0) return ~0U;
+
+  return AddNewSourceBuffer(NewBuf, IncludeLoc);
+}
+
+
+/// FindBufferContainingLoc - Return the ID of the buffer containing the
+/// specified location, returning -1 if not found.
+int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
+  for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
+    if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
+        // Use <= here so that a pointer to the null at the end of the buffer
+        // is included as part of the buffer.
+        Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
+      return i;
+  return -1;
+}
+
+/// FindLineNumber - Find the line number for the specified location in the
+/// specified file.  This is not a fast method.
+unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
+  if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
+  assert(BufferID != -1 && "Invalid Location!");
+  
+  MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
+  
+  // Count the number of \n's between the start of the file and the specified
+  // location.
+  unsigned LineNo = 1;
+  
+  const char *Ptr = Buff->getBufferStart();
+
+  for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
+    if (*Ptr == '\n') ++LineNo;
+  return LineNo;
+}
+
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc) const {
+  if (IncludeLoc == SMLoc()) return;  // Top of stack.
+  
+  int CurBuf = FindBufferContainingLoc(IncludeLoc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc);
+  
+  errs() << "Included from "
+         << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+         << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
+}
+
+
+void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg) const {
+  raw_ostream &OS = errs();
+  
+  // First thing to do: find the current buffer containing the specified
+  // location.
+  int CurBuf = FindBufferContainingLoc(Loc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+  
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc);
+  
+  MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
+  
+  
+  OS << "Parsing " << CurMB->getBufferIdentifier() << ":"
+     << FindLineNumber(Loc, CurBuf) << ": ";
+  
+  OS << Msg << "\n";
+  
+  // Scan backward to find the start of the line.
+  const char *LineStart = Loc.getPointer();
+  while (LineStart != CurMB->getBufferStart() && 
+         LineStart[-1] != '\n' && LineStart[-1] != '\r')
+    --LineStart;
+  // Get the end of the line.
+  const char *LineEnd = Loc.getPointer();
+  while (LineEnd != CurMB->getBufferEnd() && 
+         LineEnd[0] != '\n' && LineEnd[0] != '\r')
+    ++LineEnd;
+  // Print out the line.
+  OS << std::string(LineStart, LineEnd) << "\n";
+  // Print out spaces before the caret.
+  for (const char *Pos = LineStart; Pos != Loc.getPointer(); ++Pos)
+    OS << (*Pos == '\t' ? '\t' : ' ');
+  OS << "^\n";
+}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index e8cf69d..dd5c3d6 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -43,6 +43,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   switch (Kind) {
   case UnknownOS: return "unknown";
 
+  case AuroraUX: return "auroraux";
   case Darwin: return "darwin";
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
@@ -79,7 +80,9 @@ void Triple::Parse() const {
     Vendor = UnknownVendor;
 
   std::string OSName = getOSName();
-  if (memcmp(&OSName[0], "darwin", 6) == 0)
+  if (memcmp(&OSName[0], "auroraux", 8) == 0)
+    OS = AuroraUX;
+  else if (memcmp(&OSName[0], "darwin", 6) == 0)
     OS = Darwin;
   else if (memcmp(&OSName[0], "dragonfly", 9) == 0)
     OS = DragonFly;
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
index 2827d88..416f981 100644
--- a/lib/System/Atomic.cpp
+++ b/lib/System/Atomic.cpp
@@ -51,3 +51,31 @@ sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
 #  error No compare-and-swap implementation for your platform!
 #endif
 }
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  ++(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedIncrement(ptr);
+#else
+#  error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  --(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedDecrement(ptr);
+#else
+#  error No atomic decrement implementation for your platform!
+#endif
+}
+
+
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index 5415dd6..a5a56e8 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -10,7 +10,9 @@ add_llvm_library(LLVMSystem
   Path.cpp
   Process.cpp
   Program.cpp
+  RWMutex.cpp
   Signals.cpp
+  Threading.cpp
   TimeValue.cpp
   )
 
diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp
index d95c25b..a5e9920 100644
--- a/lib/System/Mutex.cpp
+++ b/lib/System/Mutex.cpp
@@ -23,11 +23,11 @@
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
-Mutex::Mutex( bool recursive) { }
-Mutex::~Mutex() { }
-bool Mutex::acquire() { return true; }
-bool Mutex::release() { return true; }
-bool Mutex::tryacquire() { return true; }
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
 }
 #else
 
@@ -55,7 +55,7 @@ using namespace sys;
 static const bool pthread_enabled = true;
 
 // Construct a Mutex using pthread calls
-Mutex::Mutex( bool recursive)
+MutexImpl::MutexImpl( bool recursive)
   : data_(0)
 {
   if (pthread_enabled)
@@ -94,7 +94,7 @@ Mutex::Mutex( bool recursive)
 }
 
 // Destruct a Mutex
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
 {
   if (pthread_enabled)
   {
@@ -106,7 +106,7 @@ Mutex::~Mutex()
 }
 
 bool
-Mutex::acquire()
+MutexImpl::acquire()
 {
   if (pthread_enabled)
   {
@@ -120,7 +120,7 @@ Mutex::acquire()
 }
 
 bool
-Mutex::release()
+MutexImpl::release()
 {
   if (pthread_enabled)
   {
@@ -134,7 +134,7 @@ Mutex::release()
 }
 
 bool
-Mutex::tryacquire()
+MutexImpl::tryacquire()
 {
   if (pthread_enabled)
   {
diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp
new file mode 100644
index 0000000..15d98cb
--- /dev/null
+++ b/lib/System/RWMutex.cpp
@@ -0,0 +1,175 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/System/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_rwlock_init does have an address, then rwlock support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_rwlock_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_rwlock data structures
+    pthread_rwlock_t* rwlock =
+      static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+    // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+    bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+    pthread_rwlockattr_t attr;
+
+    // Initialize the rwlock attributes
+    int errorcode = pthread_rwlockattr_init(&attr);
+    assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+    // Make it a process local rwlock
+    errorcode = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+#endif
+
+    // Initialize the rwlock
+    errorcode = pthread_rwlock_init(rwlock, &attr);
+    assert(errorcode == 0);
+
+    // Destroy the attributes
+    errorcode = pthread_rwlockattr_destroy(&attr);
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = rwlock;
+  }
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+    pthread_rwlock_destroy(rwlock);
+    free(rwlock);
+  }
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_rdlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_wrlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Win32/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/lib/System/Threading.cpp b/lib/System/Threading.cpp
new file mode 100644
index 0000000..a2d7f82
--- /dev/null
+++ b/lib/System/Threading.cpp
@@ -0,0 +1,63 @@
+//===-- llvm/System/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Threading.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Mutex.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(!multithreaded_mode && "Already multithreaded!");
+  multithreaded_mode = true;
+  global_lock = new sys::Mutex(true);
+  
+  // We fence here to ensure that all initialization is complete BEFORE we
+  // return from llvm_start_multithreaded().
+  sys::MemoryFence();
+  return true;
+#else
+  return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(multithreaded_mode && "Not currently multithreaded!");
+  
+  // We fence here to insure that all threaded operations are complete BEFORE we
+  // return from llvm_stop_multithreaded().
+  sys::MemoryFence();
+  
+  multithreaded_mode = false;
+  delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+  return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+  if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+  if (multithreaded_mode) global_lock->release();
+}
diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc
index 4a015a6..10e7ecb 100644
--- a/lib/System/Unix/Mutex.inc
+++ b/lib/System/Unix/Mutex.inc
@@ -20,28 +20,28 @@ namespace llvm
 {
 using namespace sys;
 
-Mutex::Mutex( bool recursive)
+MutexImpl::MutexImpl( bool recursive)
 {
 }
 
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
 {
 }
 
 bool 
-Mutex::acquire()
+MutexImpl::MutexImpl()
 {
   return true;
 }
 
 bool 
-Mutex::release()
+MutexImpl::release()
 {
   return true;
 }
 
 bool 
-Mutex::tryacquire( void )
+MutexImpl::tryacquire( void )
 {
   return true;
 }
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index d5edee1..1f73571 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -104,6 +104,14 @@ Path::isValid() const {
 }
 
 bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  if (NameLen == 0)
+    return false;
+  return NameStart[0] == '/';
+}
+
+bool
 Path::isAbsolute() const {
   if (path.empty())
     return false;
diff --git a/lib/System/Unix/RWMutex.inc b/lib/System/Unix/RWMutex.inc
new file mode 100644
index 0000000..e83d41e
--- /dev/null
+++ b/lib/System/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/System/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock  =//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  return true;
+}
+
+}
diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h
index 452226f..c2c06dd 100644
--- a/lib/System/Unix/Unix.h
+++ b/lib/System/Unix/Unix.h
@@ -79,12 +79,19 @@ static inline bool MakeErrMsg(
     return true;
   char buffer[MAXPATHLEN];
   buffer[0] = 0;
+  char* str = buffer;
   if (errnum == -1)
     errnum = errno;
 #ifdef HAVE_STRERROR_R
   // strerror_r is thread-safe.
   if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+    // glibc defines its own incompatible version of strerror_r
+    // which may not use the buffer supplied.
+    str = strerror_r(errnum,buffer,MAXPATHLEN-1);
+# else
     strerror_r(errnum,buffer,MAXPATHLEN-1);
+# endif
 #elif HAVE_STRERROR
   // Copy the thread un-safe result of strerror into
   // the buffer as fast as possible to minimize impact
@@ -97,7 +104,7 @@ static inline bool MakeErrMsg(
   // but, oh well, just use a generic message
   sprintf(buffer, "Error #%d", errnum);
 #endif
-  *ErrMsg = prefix + ": " + buffer;
+  *ErrMsg = prefix + ": " + str;
   return true;
 }
 
diff --git a/lib/System/Win32/Mutex.inc b/lib/System/Win32/Mutex.inc
index 7c1723b..75f01fe 100644
--- a/lib/System/Win32/Mutex.inc
+++ b/lib/System/Win32/Mutex.inc
@@ -22,13 +22,13 @@
 namespace llvm {
 using namespace sys;
 
-Mutex::Mutex(bool /*recursive*/)
+MutexImpl::MutexImpl(bool /*recursive*/)
 {
   data_ = new CRITICAL_SECTION;
   InitializeCriticalSection((LPCRITICAL_SECTION)data_);
 }
 
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
 {
   DeleteCriticalSection((LPCRITICAL_SECTION)data_);
   delete (LPCRITICAL_SECTION)data_;
@@ -36,21 +36,21 @@ Mutex::~Mutex()
 }
 
 bool 
-Mutex::acquire()
+MutexImpl::acquire()
 {
   EnterCriticalSection((LPCRITICAL_SECTION)data_);
   return true;
 }
 
 bool 
-Mutex::release()
+MutexImpl::release()
 {
   LeaveCriticalSection((LPCRITICAL_SECTION)data_);
   return true;
 }
 
 bool 
-Mutex::tryacquire()
+MutexImpl::tryacquire()
 {
   return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
 }
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index fbf8f66..683c94b 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -125,6 +125,20 @@ Path::isValid() const {
   return true;
 }
 
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  switch (NameLen) {
+  case 0:
+    return false;
+  case 1:
+  case 2:
+    return NameStart[0] == '/';
+  default:
+    return NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/');
+  }
+}
+
 bool 
 Path::isAbsolute() const {
   switch (path.length()) {
@@ -234,7 +248,9 @@ Path::GetCurrentDirectory() {
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
 Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
-  return Path();
+  char pathname[MAX_PATH];
+  DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+  return ret != MAX_PATH ? Path(pathname) : Path();
 }
 
 
diff --git a/lib/System/Win32/RWMutex.inc b/lib/System/Win32/RWMutex.inc
new file mode 100644
index 0000000..e269226
--- /dev/null
+++ b/lib/System/Win32/RWMutex.inc
@@ -0,0 +1,58 @@
+//= llvm/System/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock  =//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+
+// FIXME: Windows does not have reader-writer locks pre-Vista.  If you want
+// real reader-writer locks, you a pthreads implementation for Windows.
+
+namespace llvm {
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() {
+  data_ = calloc(1, sizeof(CRITICAL_SECTION));
+  InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+}
+
+RWMutexImpl::~RWMutexImpl() {
+  DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  free(data_);
+}
+
+bool RWMutexImpl::reader_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+
+}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 594811d..9001e50 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -45,62 +45,72 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
 // ARM Processors supported.
 //
 
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+include "ARMSchedule.td"
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
 
 // V4 Processors.
-def : Proc<"generic",         []>;
-def : Proc<"arm8",            []>;
-def : Proc<"arm810",          []>;
-def : Proc<"strongarm",       []>;
-def : Proc<"strongarm110",    []>;
-def : Proc<"strongarm1100",   []>;
-def : Proc<"strongarm1110",   []>;
+def : ProcNoItin<"generic",         []>;
+def : ProcNoItin<"arm8",            []>;
+def : ProcNoItin<"arm810",          []>;
+def : ProcNoItin<"strongarm",       []>;
+def : ProcNoItin<"strongarm110",    []>;
+def : ProcNoItin<"strongarm1100",   []>;
+def : ProcNoItin<"strongarm1110",   []>;
 
 // V4T Processors.
-def : Proc<"arm7tdmi",        [ArchV4T]>;
-def : Proc<"arm7tdmi-s",      [ArchV4T]>;
-def : Proc<"arm710t",         [ArchV4T]>;
-def : Proc<"arm720t",         [ArchV4T]>;
-def : Proc<"arm9",            [ArchV4T]>;
-def : Proc<"arm9tdmi",        [ArchV4T]>;
-def : Proc<"arm920",          [ArchV4T]>;
-def : Proc<"arm920t",         [ArchV4T]>;
-def : Proc<"arm922t",         [ArchV4T]>;
-def : Proc<"arm940t",         [ArchV4T]>;
-def : Proc<"ep9312",          [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi-s",      [ArchV4T]>;
+def : ProcNoItin<"arm710t",         [ArchV4T]>;
+def : ProcNoItin<"arm720t",         [ArchV4T]>;
+def : ProcNoItin<"arm9",            [ArchV4T]>;
+def : ProcNoItin<"arm9tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm920",          [ArchV4T]>;
+def : ProcNoItin<"arm920t",         [ArchV4T]>;
+def : ProcNoItin<"arm922t",         [ArchV4T]>;
+def : ProcNoItin<"arm940t",         [ArchV4T]>;
+def : ProcNoItin<"ep9312",          [ArchV4T]>;
 
 // V5T Processors.
-def : Proc<"arm10tdmi",       [ArchV5T]>;
-def : Proc<"arm1020t",        [ArchV5T]>;
+def : ProcNoItin<"arm10tdmi",       [ArchV5T]>;
+def : ProcNoItin<"arm1020t",        [ArchV5T]>;
 
 // V5TE Processors.
-def : Proc<"arm9e",           [ArchV5TE]>;
-def : Proc<"arm926ej-s",      [ArchV5TE]>;
-def : Proc<"arm946e-s",       [ArchV5TE]>;
-def : Proc<"arm966e-s",       [ArchV5TE]>;
-def : Proc<"arm968e-s",       [ArchV5TE]>;
-def : Proc<"arm10e",          [ArchV5TE]>;
-def : Proc<"arm1020e",        [ArchV5TE]>;
-def : Proc<"arm1022e",        [ArchV5TE]>;
-def : Proc<"xscale",          [ArchV5TE]>;
-def : Proc<"iwmmxt",          [ArchV5TE]>;
+def : ProcNoItin<"arm9e",           [ArchV5TE]>;
+def : ProcNoItin<"arm926ej-s",      [ArchV5TE]>;
+def : ProcNoItin<"arm946e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm966e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm968e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm10e",          [ArchV5TE]>;
+def : ProcNoItin<"arm1020e",        [ArchV5TE]>;
+def : ProcNoItin<"arm1022e",        [ArchV5TE]>;
+def : ProcNoItin<"xscale",          [ArchV5TE]>;
+def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
 
 // V6 Processors.
-def : Proc<"arm1136j-s",      [ArchV6]>;
-def : Proc<"arm1136jf-s",     [ArchV6, FeatureVFP2]>;
-def : Proc<"arm1176jz-s",     [ArchV6]>;
-def : Proc<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
-def : Proc<"mpcorenovfp",     [ArchV6]>;
-def : Proc<"mpcore",          [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136j-s",       V6Itineraries,
+                [ArchV6]>;
+def : Processor<"arm1136jf-s",      V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jz-s",      V6Itineraries,
+                [ArchV6]>;
+def : Processor<"arm1176jzf-s",     V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcorenovfp",      V6Itineraries,
+                [ArchV6]>;
+def : Processor<"mpcore",           V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
 
 // V6T2 Processors.
-def : Proc<"arm1156t2-s",     [ArchV6T2, FeatureThumb2]>;
-def : Proc<"arm1156t2f-s",    [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s",     V6Itineraries,
+                [ArchV6T2, FeatureThumb2]>;
+def : Processor<"arm1156t2f-s",    V6Itineraries,
+                [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
 
 // V7 Processors.
-def : Proc<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
-def : Proc<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index f126760..47151e6 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -17,11 +17,6 @@ class CCIfSubtarget<string F, CCAction A>:
 class CCIfAlign<string Align, CCAction A>:
   CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 
-/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or
-///                "Soft".
-class CCIfFloatABI<string ABIType, CCAction A>:
-  CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>;
-
 //===----------------------------------------------------------------------===//
 // ARM APCS Calling Convention
 //===----------------------------------------------------------------------===//
@@ -105,25 +100,3 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
                                  S9, S10, S11, S12, S13, S14, S15]>>,
   CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
-
-//===----------------------------------------------------------------------===//
-// ARM Calling Convention Dispatch
-//===----------------------------------------------------------------------===//
-
-def CC_ARM : CallingConv<[
-  CCIfSubtarget<"isAAPCS_ABI()",
-                CCIfSubtarget<"hasVFP2()",
-                              CCIfFloatABI<"Hard",
-                                           CCDelegateTo<CC_ARM_AAPCS_VFP>>>>,
-  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
-  CCDelegateTo<CC_ARM_APCS>
-]>;
-
-def RetCC_ARM : CallingConv<[
-  CCIfSubtarget<"isAAPCS_ABI()",
-                CCIfSubtarget<"hasVFP2()",
-                              CCIfFloatABI<"Hard",
-                                           CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>,
-  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
-  CCDelegateTo<RetCC_ARM_APCS>
-]>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 44fac12..f6629fe 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -900,6 +900,10 @@ void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   // Set first operand
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
+  // Skip LDRD and STRD's second operand.
+  if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+    ++OpIdx;
+
   // Set second operand
   if (ImplicitRn)
     // Special handling for implicit use (e.g. PC).
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ca3a9cb..1ed9e80 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -52,8 +52,13 @@ public:
 
   virtual const char *getPassName() const {
     return "ARM Instruction Selection";
-  } 
-  
+  }
+
+ /// getI32Imm - Return a target constant with the specified value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+
   SDNode *Select(SDValue Op);
   virtual void InstructionSelect();
   bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
@@ -84,6 +89,9 @@ public:
   bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
+  bool SelectShifterOperand(SDValue Op, SDValue N,
+                            SDValue &BaseReg, SDValue &Opc);
+
   bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
   
@@ -509,8 +517,30 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
+bool ARMDAGToDAGISel::SelectShifterOperand(SDValue Op,
+                                           SDValue N,
+                                           SDValue &BaseReg,
+                                           SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)))
+    ShImmVal = RHS->getZExtValue() & 31;
+  else
+    return false;
+
+  Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
-                                              SDValue N, 
+                                              SDValue N,
                                               SDValue &BaseReg,
                                               SDValue &ShReg,
                                               SDValue &Opc) {
@@ -549,6 +579,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   switch (N->getOpcode()) {
   default: break;
   case ISD::Constant: {
+    // ARMv6T2 and later should materialize imms via MOV / MOVT pair.
+    if (Subtarget->hasV6T2Ops() || Subtarget->hasThumb2())
+      break;
+
     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
     bool UseCP = true;
     if (Subtarget->isThumb())
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ec8bd1f..2443625 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -292,6 +292,25 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
   setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
 
+  if (!Subtarget->isThumb()) {
+    // Use branch latency information to determine if-conversion limits.
+    // FIXME: If-converter should use instruction latency of the branch being
+    // eliminated to compute the threshold. For ARMv6, the branch "latency"
+    // varies depending on whether it's dynamically or statically predicted
+    // and on whether the destination is in the prefetch buffer.
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
+    unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
+    if (Latency > 1) {
+      setIfCvtBlockSizeLimit(Latency-1);
+      if (Latency > 2)
+        setIfCvtDupBlockSizeLimit(Latency-2);
+    } else {
+      setIfCvtBlockSizeLimit(10);
+      setIfCvtDupBlockSizeLimit(2);
+    }
+  }
+
   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
   // Do not enable CodePlacementOpt for now: it currently runs after the
   // ARMConstantIslandPass and messes up branch relaxation and placement
@@ -415,7 +434,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                         ARM::NoRegister };
 
   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
-  if (Reg == 0) 
+  if (Reg == 0)
     return false; // we didn't handle it
 
   unsigned i;
@@ -487,6 +506,33 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                    State);
 }
 
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
+                                                 bool Return) const {
+  switch (CC) {
+  default:
+   assert(0 && "Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+   // Use target triple & subtarget features to do actual dispatch.
+   if (Subtarget->isAAPCS_ABI()) {
+     if (Subtarget->hasVFP2() &&
+         FloatABIType == FloatABI::Hard)
+       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+     else
+       return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+   } else
+     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  case CallingConv::ARM_AAPCS_VFP:
+   return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+   return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+   return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  }
+}
+
 /// LowerCallResult - Lower the result values of an ISD::CALL into the
 /// appropriate copies out of appropriate physical registers.  This assumes that
 /// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
@@ -501,7 +547,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
   SmallVector<CCValAssign, 16> RVLocs;
   bool isVarArg = TheCall->isVarArg();
   CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM);
+  CCInfo.AnalyzeCallResult(TheCall,
+                           CCAssignFnForNode(CallingConv, /* Return*/ true));
 
   SmallVector<SDValue, 8> ResultVals;
 
@@ -586,8 +633,6 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   MVT RetVT           = TheCall->getRetValType(0);
   SDValue Chain       = TheCall->getChain();
   unsigned CC         = TheCall->getCallingConv();
-  assert((CC == CallingConv::C ||
-          CC == CallingConv::Fast) && "unknown calling convention");
   bool isVarArg       = TheCall->isVarArg();
   SDValue Callee      = TheCall->getCallee();
   DebugLoc dl         = TheCall->getDebugLoc();
@@ -595,7 +640,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeCallOperands(TheCall, CC_ARM);
+  CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false));
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -788,7 +833,7 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
   CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
 
   // Analyze return values of ISD::RET.
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM);
+  CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true));
 
   // If this is the first return lowered for this function, add
   // the regs to the liveout set for the function.
@@ -1085,7 +1130,8 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM);
+  CCInfo.AnalyzeFormalArguments(Op.getNode(),
+                                CCAssignFnForNode(CC, /* Return*/ false));
 
   SmallVector<SDValue, 16> ArgValues;
 
@@ -1456,7 +1502,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces())
+  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
     ? ARM::R7 : ARM::R11;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 2dab2db..8f53e39 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -151,6 +151,7 @@ namespace llvm {
     ///
     unsigned ARMPCLabelIndex;
 
+    CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const;
     SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
                              const SDValue &StackPtr, const CCValAssign &VA,
                              SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 4b0dbb5..d19fb8e 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -697,7 +697,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
   MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
-  MBB.insert(MI, PopMI);
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
     if (Reg == ARM::LR) {
@@ -706,10 +705,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
         continue;
       Reg = ARM::PC;
       PopMI->setDesc(get(ARM::tPOP_RET));
-      MBB.erase(MI);
+      MI = MBB.erase(MI);
     }
     PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
   }
+
+  // It's illegal to emit pop instruction without operands.
+  if (PopMI->getNumOperands() > 0)
+    MBB.insert(MI, PopMI);
+
   return true;
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cc9f1a5..4707e3b 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -90,12 +90,12 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
-def HasV5T   : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE  : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6    : Predicate<"Subtarget->hasV6Ops()">;
-def IsThumb  : Predicate<"Subtarget->isThumb()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM    : Predicate<"!Subtarget->isThumb()">;
+def HasV5T    : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE   : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb   : Predicate<"Subtarget->isThumb()">;
+def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
+def IsARM     : Predicate<"!Subtarget->isThumb()">;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
@@ -539,7 +539,7 @@ let isReturn = 1, isTerminator = 1 in
                     LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
                     []>;
 
-let isCall = 1,
+let isCall = 1, Itinerary = IIC_Br,
   Defs = [R0, R1, R2, R3, R12, LR,
           D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
   def BL  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
@@ -567,7 +567,7 @@ let isCall = 1,
   }
 }
 
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
   // B is "predicable" since it can be xformed into a Bcc.
   let isBarrier = 1 in {
     let isPredicable = 1 in
@@ -647,9 +647,8 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
 
 let mayLoad = 1 in {
 // Load doubleword
-def LDRD  : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                 "ldr", "d $dst, $addr",
-                []>, Requires<[IsARM, HasV5T]>;
+def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
+                "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>;
 
 // Indexed loads
 def LDR_PRE  : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
@@ -709,9 +708,8 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
 
 // Store doubleword
 let mayStore = 1 in
-def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
-               "str", "d $src, $addr",
-               []>, Requires<[IsARM, HasV5T]>;
+def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm,
+               "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>;
 
 // Indexed stores
 def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
@@ -1387,6 +1385,12 @@ def : ARMV5TEPat<(add GPR:$acc,
 include "ARMInstrThumb.td"
 
 //===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
 // Floating Point Support
 //
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 54232f6..9297f08 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -277,6 +277,7 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
 //
 
 // Add with carry
+let isCommutable = 1 in
 def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "adc $dst, $rhs",
                [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
@@ -311,6 +312,7 @@ def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
                   "add $dst, $rhs * 4", []>;
 
+let isCommutable = 1 in
 def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "and $dst, $rhs",
                 [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
@@ -358,6 +360,7 @@ def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
 
 // TODO: A7-37: CMP(3) - cmp hi regs
 
+let isCommutable = 1 in
 def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "eor $dst, $rhs",
                [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
@@ -399,6 +402,7 @@ def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2hir", []>;
 } // neverHasSideEffects
 
+let isCommutable = 1 in
 def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "mul $dst, $rhs",
                [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
@@ -411,6 +415,7 @@ def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
               "neg $dst, $src",
               [(set tGPR:$dst, (ineg tGPR:$src))]>;
 
+let isCommutable = 1 in
 def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "orr $dst, $rhs",
                [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 168fb45..07c71da 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -10,3 +10,199 @@
 // This file describes the Thumb2 instruction set.
 //
 //===----------------------------------------------------------------------===//
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>,    // reg imm
+                ComplexPattern<i32, 2, "SelectShifterOperand",
+                               [shl,srl,sra,rotr]> {
+  let PrintMethod = "printSOOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def LO16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def imm16high : PatLeaf<(i32 imm), [{
+  // Returns true if all bits out of the [31..16] range are 0.
+  return ((N->getZExtValue() & 0xFFFF0000ULL) == N->getZExtValue());
+}], HI16>;
+
+def imm16high0xffff : PatLeaf<(i32 imm), [{
+  // Returns true if lo 16 bits are set and this is a 32-bit value. 
+  return ((N->getZExtValue() & 0x0000FFFFULL) == 0xFFFFULL);
+}], HI16>;
+
+def imm0_4095 : PatLeaf<(i32 imm), [{ 
+  return (uint32_t)N->getZExtValue() < 4096; 
+}]>; 
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{ 
+ return (uint32_t)-N->getZExtValue() < 4096; 
+}], imm_neg_XFORM>; 
+
+def imm0_65535 : PatLeaf<(i32 imm), [{ 
+  return N->getZExtValue() < 65536; 
+}]>; 
+
+// A6.3.2 Modified immediate constants in Thumb instructions (#<const>)
+// FIXME: Move it the the addrmode matcher code.
+def t2_so_imm : PatLeaf<(i32 imm), [{
+  uint64_t v = N->getZExtValue();
+  if (v == 0 || v > 0xffffffffUL) return false;
+  // variant1 - 0b0000x - 8-bit which could be zero (not supported for now)
+
+  // variant2 - 0b00nnx - 8-bit repeated inside the 32-bit room
+  unsigned hi16 = (unsigned)(v >> 16);
+  unsigned lo16 = (unsigned)(v & 0xffffUL);
+  bool valid = (hi16 == lo16) && (
+    (v & 0x00ff00ffUL) == 0 ||        // type 0001x
+    (v & 0xff00ff00UL) == 0 ||        // type 0010x
+    ((lo16 >> 8) == (lo16 & 0xff)));  // type 0011x
+  if (valid) return true;
+
+  // variant3 - 0b01000..0b11111 - 8-bit shifted inside the 32-bit room
+  unsigned shift = CountLeadingZeros_32(v);
+  uint64_t mask = (0xff000000ULL >> shift);
+  // If valid, it is type 01000 + shift
+  return ((shift < 24) && (v & mask) > 0) && ((v & (~mask)) == 0);
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+//  Thumb-2 to cover the functionality of the ARM instruction set.
+//
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|reg|so_reg}) patterns for a
+//  binary operation that produces a value.
+multiclass T2I_bin_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+}
+
+/// T2I_bin_c_irs - Similar to T2I_bin_irs except it uses the 's' bit. Also the
+/// instruction can optionally set the CPSR register.
+let Uses = [CPSR] in {
+multiclass T2I_bin_c_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+def tMOVi16  : PseudoInst<(outs GPR:$dst), (ins i32imm:$src),
+                          "movw $dst, $src",
+                          [(set GPR:$dst, imm0_65535:$src)]>, 
+                         Requires<[HasThumb2]>;
+
+let isTwoAddress = 1 in
+def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+                          "movt $dst, $imm",
+                          [(set GPR:$dst, (or (and GPR:$src, 0xffff), 
+                                              imm16high:$imm))]>,
+                         Requires<[HasThumb2]>;
+
+def : Pat<(and (or GPR:$src, imm16high:$imm1), imm16high0xffff:$imm2),
+          (tMOVTi16 GPR:$src, (HI16 imm16high:$imm1))>,
+         Requires<[HasThumb2]>;
+
+def : Pat<(i32 imm:$imm),
+          (tMOVTi16 (tMOVi16 (LO16 imm:$imm)),(HI16 imm:$imm))>,
+         Requires<[HasThumb2]>;
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+defm t2ADD  : T2I_bin_irs <"add", BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm t2SUB  : T2I_bin_irs <"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+def tADDri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                          "add $dst, $lhs, $rhs", 
+                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095:$rhs))]>,
+                         Requires<[HasThumb2]>; 
+def tSUBri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), 
+                          "sub $dst, $lhs, $rhs",
+                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095_neg:$rhs))]>,
+                         Requires<[HasThumb2]>;
+
+defm t2ADDS : T2I_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm t2SUBS : T2I_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC : T2I_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm t2SBC : T2I_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+
+def tMLS : PseudoInst<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), 
+                      "mls $dst, $a, $b, $c", 
+                      [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
+                     Requires<[HasThumb2]>;
+
+def tORNrs : PseudoInst<(outs GPR:$dst), (ins GPR:$src1, t2_so_reg:$src2),
+                        "orn $dst, $src1, $src2",
+                        [(set GPR:$dst, (or GPR:$src1, (not t2_so_reg: $src2)))]>,
+                       Requires<[HasThumb2]>;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 684ecb4..59cf125 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -17,19 +17,22 @@
 #include "ARMAddressingModes.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -39,6 +42,12 @@ STATISTIC(NumSTMGened , "Number of stm instructions generated");
 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
 
 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
 /// load / store instructions to form ldm / stm instructions.
@@ -82,6 +91,8 @@ namespace {
                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
 
     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -586,13 +597,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
 static int getMemoryOpOffset(const MachineInstr *MI) {
   int Opcode = MI->getOpcode();
   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   unsigned NumOperands = MI->getDesc().getNumOperands();
   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
   int Offset = isAM2
-    ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+    ? ARM_AM::getAM2Offset(OffField)
+    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
+             : ARM_AM::getAM5Offset(OffField) * 4);
   if (isAM2) {
     if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
       Offset = -Offset;
+  } else if (isAM3) {
+    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
   } else {
     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
       Offset = -Offset;
@@ -600,6 +617,120 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
   return Offset;
 }
 
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MBBI,
+                          int OffImm, bool isDef,
+                          DebugLoc dl, unsigned NewOpc,
+                          unsigned Reg, bool RegDeadKill,
+                          unsigned BaseReg, bool BaseKill,
+                          unsigned OffReg, bool OffKill,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          const TargetInstrInfo *TII) {
+  unsigned Offset;
+  if (OffImm < 0)
+    Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
+  else
+    Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
+  if (isDef)
+    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill))
+      .addReg(OffReg,  getKillRegState(OffKill))
+      .addImm(Offset)
+      .addImm(Pred).addReg(PredReg);
+  else
+    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+      .addReg(Reg, getKillRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill))
+      .addReg(OffReg,  getKillRegState(OffKill))
+      .addImm(Offset)
+      .addImm(Pred).addReg(PredReg);
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = &*MBBI;
+  unsigned Opcode = MI->getOpcode();
+  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
+    unsigned EvenReg = MI->getOperand(0).getReg();
+    unsigned OddReg  = MI->getOperand(1).getReg();
+    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
+    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+      return false;
+
+    bool isLd = Opcode == ARM::LDRD;
+    bool EvenDeadKill = isLd ?
+      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+    bool OddDeadKill  = isLd ?
+      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+    const MachineOperand &BaseOp = MI->getOperand(2);
+    unsigned BaseReg = BaseOp.getReg();
+    bool BaseKill = BaseOp.isKill();
+    const MachineOperand &OffOp = MI->getOperand(3);
+    unsigned OffReg = OffOp.getReg();
+    bool OffKill = OffOp.isKill();
+    int OffImm = getMemoryOpOffset(MI);
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
+      // Ascending register numbers and no offset. It's safe to change it to a
+      // ldm or stm.
+      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
+      if (isLd) {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+          .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+        ++NumLDRD2LDM;
+      } else {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getKillRegState(EvenDeadKill))
+          .addReg(OddReg, getKillRegState(OddDeadKill));
+        ++NumSTRD2STM;
+      }
+    } else {
+      // Split into two instructions.
+      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
+      DebugLoc dl = MBBI->getDebugLoc();
+      // If this is a load and base register is killed, it may have been
+      // re-defed by the load, make sure the first load does not clobber it.
+      if (isLd &&
+          (BaseKill || OffKill) &&
+          (TRI->regsOverlap(EvenReg, BaseReg) ||
+           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
+        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
+               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
+                      BaseReg, false, OffReg, false, Pred, PredReg, TII);
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
+                      BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
+      } else {
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
+                      Pred, PredReg, TII);
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
+                      Pred, PredReg, TII);
+      }
+      if (isLd)
+        ++NumLDRD2LDR;
+      else
+        ++NumSTRD2STR;
+    }
+
+    MBBI = prior(MBBI);
+    MBB.erase(MI);
+  }
+  return false;
+}
+
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@@ -617,6 +748,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   RS->enterBasicBlock(&MBB);
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   while (MBBI != E) {
+    if (FixInvalidRegPairOp(MBB, MBBI))
+      continue;
+
     bool Advance  = false;
     bool TryMerge = false;
     bool Clobber  = false;
@@ -817,8 +951,10 @@ namespace {
     static char ID;
     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
 
+    const TargetData *TD;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
     MachineRegisterInfo *MRI;
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -828,6 +964,11 @@ namespace {
     }
 
   private:
+    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+                          unsigned &NewOpc, unsigned &EvenReg,
+                          unsigned &OddReg, unsigned &BaseReg,
+                          unsigned &OffReg, unsigned &Offset,
+                          unsigned &PredReg, ARMCC::CondCodes &Pred);
     bool RescheduleOps(MachineBasicBlock *MBB,
                        SmallVector<MachineInstr*, 4> &Ops,
                        unsigned Base, bool isLd,
@@ -838,8 +979,10 @@ namespace {
 }
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  TD  = Fn.getTarget().getTargetData();
   TII = Fn.getTarget().getInstrInfo();
   TRI = Fn.getTarget().getRegisterInfo();
+  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
   MRI = &Fn.getRegInfo();
 
   bool Modified = false;
@@ -850,15 +993,19 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   return Modified;
 }
 
-static bool IsSafeToMove(bool isLd, unsigned Base,
-                         MachineBasicBlock::iterator I,
-                         MachineBasicBlock::iterator E,
-                         SmallPtrSet<MachineInstr*, 4> MoveOps,
-                         const TargetRegisterInfo *TRI) {
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+                                      MachineBasicBlock::iterator I,
+                                      MachineBasicBlock::iterator E,
+                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
+                                      SmallSet<unsigned, 4> &MemRegs,
+                                      const TargetRegisterInfo *TRI) {
   // Are there stores / loads / calls between them?
   // FIXME: This is overly conservative. We should make use of alias information
   // some day.
+  SmallSet<unsigned, 4> AddedRegPressure;
   while (++I != E) {
+    if (MemOps.count(&*I))
+      continue;
     const TargetInstrDesc &TID = I->getDesc();
     if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
       return false;
@@ -871,15 +1018,76 @@ static bool IsSafeToMove(bool isLd, unsigned Base,
       // str r1, [r0]
       // strh r5, [r0]
       // str r4, [r0, #+4]
-      if (TID.mayStore() && !MoveOps.count(&*I))
+      if (TID.mayStore())
         return false;
     }
     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
       MachineOperand &MO = I->getOperand(j);
-      if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
         return false;
+      if (Reg != Base && !MemRegs.count(Reg))
+        AddedRegPressure.insert(Reg);
     }
   }
+
+  // Estimate register pressure increase due to the transformation.
+  if (MemRegs.size() <= 4)
+    // Ok if we are moving small number of instructions.
+    return true;
+  return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+                                          DebugLoc &dl,
+                                          unsigned &NewOpc, unsigned &EvenReg,
+                                          unsigned &OddReg, unsigned &BaseReg,
+                                          unsigned &OffReg, unsigned &Offset,
+                                          unsigned &PredReg,
+                                          ARMCC::CondCodes &Pred) {
+  // FIXME: FLDS / FSTS -> FLDD / FSTD
+  unsigned Opcode = Op0->getOpcode();
+  if (Opcode == ARM::LDR)
+    NewOpc = ARM::LDRD;
+  else if (Opcode == ARM::STR)
+    NewOpc = ARM::STRD;
+  else
+    return 0;
+
+  // Must sure the base address satisfies i64 ld / st alignment requirement.
+  if (!Op0->hasOneMemOperand() ||
+      !Op0->memoperands_begin()->getValue() ||
+      Op0->memoperands_begin()->isVolatile())
+    return false;
+
+  unsigned Align = Op0->memoperands_begin()->getAlignment();
+  unsigned ReqAlign = STI->hasV6Ops()
+    ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
+  if (Align < ReqAlign)
+    return false;
+
+  // Then make sure the immediate offset fits.
+  int OffImm = getMemoryOpOffset(Op0);
+  ARM_AM::AddrOpc AddSub = ARM_AM::add;
+  if (OffImm < 0) {
+    AddSub = ARM_AM::sub;
+    OffImm = - OffImm;
+  }
+  if (OffImm >= 256) // 8 bits
+    return false;
+  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+
+  EvenReg = Op0->getOperand(0).getReg();
+  OddReg  = Op1->getOperand(0).getReg();
+  if (EvenReg == OddReg)
+    return false;
+  BaseReg = Op0->getOperand(1).getReg();
+  OffReg = Op0->getOperand(2).getReg();
+  Pred = getInstrPredicate(Op0, PredReg);
+  dl = Op0->getDebugLoc();
   return true;
 }
 
@@ -902,6 +1110,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
     MachineInstr *FirstOp = 0;
     MachineInstr *LastOp = 0;
     int LastOffset = 0;
+    unsigned LastOpcode = 0;
     unsigned LastBytes = 0;
     unsigned NumMove = 0;
     for (int i = Ops.size() - 1; i >= 0; --i) {
@@ -916,6 +1125,10 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         LastOp = Op;
       }
 
+      unsigned Opcode = Op->getOpcode();
+      if (LastOpcode && Opcode != LastOpcode)
+        break;
+
       int Offset = getMemoryOpOffset(Op);
       unsigned Bytes = getLSMultipleTransferSize(Op);
       if (LastBytes) {
@@ -924,34 +1137,80 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
       }
       LastOffset = Offset;
       LastBytes = Bytes;
-      if (++NumMove == 4)
+      LastOpcode = Opcode;
+      if (++NumMove == 8) // FIXME: Tune
         break;
     }
 
     if (NumMove <= 1)
       Ops.pop_back();
     else {
-      SmallPtrSet<MachineInstr*, 4> MoveOps;
-      for (int i = NumMove-1; i >= 0; --i)
-        MoveOps.insert(Ops[i]);
+      SmallPtrSet<MachineInstr*, 4> MemOps;
+      SmallSet<unsigned, 4> MemRegs;
+      for (int i = NumMove-1; i >= 0; --i) {
+        MemOps.insert(Ops[i]);
+        MemRegs.insert(Ops[i]->getOperand(0).getReg());
+      }
 
       // Be conservative, if the instructions are too far apart, don't
       // move them. We want to limit the increase of register pressure.
-      bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
       if (DoMove)
-        DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+                                           MemOps, MemRegs, TRI);
       if (!DoMove) {
         for (unsigned i = 0; i != NumMove; ++i)
           Ops.pop_back();
       } else {
         // This is the new location for the loads / stores.
         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
-        while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+        while (InsertPos != MBB->end() && MemOps.count(InsertPos))
           ++InsertPos;
-        for (unsigned i = 0; i != NumMove; ++i) {
-          MachineInstr *Op = Ops.back();
+
+        // If we are moving a pair of loads / stores, see if it makes sense
+        // to try to allocate a pair of registers that can form register pairs.
+        MachineInstr *Op0 = Ops.back();
+        MachineInstr *Op1 = Ops[Ops.size()-2];
+        unsigned EvenReg = 0, OddReg = 0;
+        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
+        ARMCC::CondCodes Pred = ARMCC::AL;
+        unsigned NewOpc = 0;
+        unsigned Offset = 0;
+        DebugLoc dl;
+        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+                                             EvenReg, OddReg, BaseReg, OffReg,
+                                             Offset, PredReg, Pred)) {
+          Ops.pop_back();
           Ops.pop_back();
-          MBB->splice(InsertPos, MBB, Op);
+
+          // Form the pair instruction.
+          if (isLd) {
+            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+              .addReg(EvenReg, RegState::Define)
+              .addReg(OddReg, RegState::Define)
+              .addReg(BaseReg).addReg(0).addImm(Offset)
+              .addImm(Pred).addReg(PredReg);
+            ++NumLDRDFormed;
+          } else {
+            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+              .addReg(EvenReg)
+              .addReg(OddReg)
+              .addReg(BaseReg).addReg(0).addImm(Offset)
+              .addImm(Pred).addReg(PredReg);
+            ++NumSTRDFormed;
+          }
+          MBB->erase(Op0);
+          MBB->erase(Op1);
+
+          // Add register allocation hints to form register pairs.
+          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
+        } else {
+          for (unsigned i = 0; i != NumMove; ++i) {
+            MachineInstr *Op = Ops.back();
+            Ops.pop_back();
+            MBB->splice(InsertPos, MBB, Op);
+          }
         }
 
         NumLdStMoved += NumMove;
@@ -1039,7 +1298,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
       }
 
       if (StopHere) {
-        // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+        // Found a duplicate (a base+offset combination that's seen earlier).
+        // Backtrack.
         --Loc;
         break;
       }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 199858f..bbc1300 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -159,7 +159,7 @@ ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
                                  const ARMSubtarget &sti)
   : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     TII(tii), STI(sti),
-    FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+    FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
 }
 
 static inline
@@ -194,10 +194,6 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
       .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
 }
 
-const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
-  return &ARM::GPRRegClass;
-}
-
 /// isLowRegister - Returns true if the register is low register r0-r7.
 ///
 bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
@@ -304,6 +300,191 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
   return false;
 }
 
+const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+  return &ARM::GPRRegClass;
+}
+
+/// getAllocationOrder - Returns the register allocation order for a specified
+/// register class in the form of a pair of TargetRegisterClass iterators.
+std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+                                    unsigned HintType, unsigned HintReg,
+                                    const MachineFunction &MF) const {
+  // Alternative register allocation orders when favoring even / odd registers
+  // of register pairs.
+
+  // No FP, R9 is available.
+  static const unsigned GPREven1[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd1[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R7, R9 is available.
+  static const unsigned GPREven2[] = {
+    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd2[] = {
+    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R11, R9 is available.
+  static const unsigned GPREven3[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9
+  };
+  static const unsigned GPROdd3[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+    ARM::R8
+  };
+
+  // No FP, R9 is not available.
+  static const unsigned GPREven4[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd4[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R7, R9 is not available.
+  static const unsigned GPREven5[] = {
+    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd5[] = {
+    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R11, R9 is not available.
+  static const unsigned GPREven6[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+  };
+  static const unsigned GPROdd6[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+  };
+
+
+  if (HintType == ARMRI::RegPairEven) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven1,
+                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven4,
+                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven2,
+                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven5,
+                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven3,
+                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven6,
+                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+    }
+  } else if (HintType == ARMRI::RegPairOdd) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd1,
+                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd4,
+                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd2,
+                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd5,
+                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd3,
+                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd6,
+                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+    }
+  }
+  return std::make_pair(RC->allocation_order_begin(MF),
+                        RC->allocation_order_end(MF));
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                     const MachineFunction &MF) const {
+  if (Reg == 0 || !isPhysicalRegister(Reg))
+    return 0;
+  if (Type == 0)
+    return Reg;
+  else if (Type == (unsigned)ARMRI::RegPairOdd)
+    // Odd register.
+    return getRegisterPairOdd(Reg, MF);
+  else if (Type == (unsigned)ARMRI::RegPairEven)
+    // Even register.
+    return getRegisterPairEven(Reg, MF);
+  return 0;
+}
+
+void
+ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                    MachineFunction &MF) const {
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+       Hint.first == (unsigned)ARMRI::RegPairEven) &&
+      Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+    // If 'Reg' is one of the even / odd register pair and it's now changed
+    // (e.g. coalesced) into a different register. The other register of the
+    // pair allocation hint must be updated to reflect the relationship
+    // change.
+    unsigned OtherReg = Hint.second;
+    Hint = MRI->getRegAllocationHint(OtherReg);
+    if (Hint.second == Reg)
+      // Make sure the pair has not already divorced.
+      MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+  }
+}
+
 bool
 ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1506,9 +1687,8 @@ unsigned ARMRegisterInfo::getRARegister() const {
 
 unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
   if (STI.isTargetDarwin() || hasFP(MF))
-    return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
-  else
-    return ARM::SP;
+    return FramePtr;
+  return ARM::SP;
 }
 
 unsigned ARMRegisterInfo::getEHExceptionRegister() const {
@@ -1525,4 +1705,152 @@ int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
+unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                              const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R1:
+    return ARM::R0;
+  case ARM::R3:
+    // FIXME!
+    return STI.isThumb() ? 0 : ARM::R2;
+  case ARM::R5:
+    return ARM::R4;
+  case ARM::R7:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R6;
+  case ARM::R9:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R8;
+  case ARM::R11:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+  case ARM::S1:
+    return ARM::S0;
+  case ARM::S3:
+    return ARM::S2;
+  case ARM::S5:
+    return ARM::S4;
+  case ARM::S7:
+    return ARM::S6;
+  case ARM::S9:
+    return ARM::S8;
+  case ARM::S11:
+    return ARM::S10;
+  case ARM::S13:
+    return ARM::S12;
+  case ARM::S15:
+    return ARM::S14;
+  case ARM::S17:
+    return ARM::S16;
+  case ARM::S19:
+    return ARM::S18;
+  case ARM::S21:
+    return ARM::S20;
+  case ARM::S23:
+    return ARM::S22;
+  case ARM::S25:
+    return ARM::S24;
+  case ARM::S27:
+    return ARM::S26;
+  case ARM::S29:
+    return ARM::S28;
+  case ARM::S31:
+    return ARM::S30;
+
+  case ARM::D1:
+    return ARM::D0;
+  case ARM::D3:
+    return ARM::D2;
+  case ARM::D5:
+    return ARM::D4;
+  case ARM::D7:
+    return ARM::D6;
+  case ARM::D9:
+    return ARM::D8;
+  case ARM::D11:
+    return ARM::D10;
+  case ARM::D13:
+    return ARM::D12;
+  case ARM::D15:
+    return ARM::D14;
+  }
+
+  return 0;
+}
+
+unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg,
+                                             const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R0:
+    return ARM::R1;
+  case ARM::R2:
+    // FIXME!
+    return STI.isThumb() ? 0 : ARM::R3;
+  case ARM::R4:
+    return ARM::R5;
+  case ARM::R6:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R7;
+  case ARM::R8:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R9;
+  case ARM::R10:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+  case ARM::S0:
+    return ARM::S1;
+  case ARM::S2:
+    return ARM::S3;
+  case ARM::S4:
+    return ARM::S5;
+  case ARM::S6:
+    return ARM::S7;
+  case ARM::S8:
+    return ARM::S9;
+  case ARM::S10:
+    return ARM::S11;
+  case ARM::S12:
+    return ARM::S13;
+  case ARM::S14:
+    return ARM::S15;
+  case ARM::S16:
+    return ARM::S17;
+  case ARM::S18:
+    return ARM::S19;
+  case ARM::S20:
+    return ARM::S21;
+  case ARM::S22:
+    return ARM::S23;
+  case ARM::S24:
+    return ARM::S25;
+  case ARM::S26:
+    return ARM::S27;
+  case ARM::S28:
+    return ARM::S29;
+  case ARM::S30:
+    return ARM::S31;
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D14:
+    return ARM::D15;
+  }
+
+  return 0;
+}
+
 #include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e1d9efb..e8f4fd8 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -22,12 +22,17 @@ namespace llvm {
   class TargetInstrInfo;
   class Type;
 
+/// Register allocation hints.
+namespace ARMRI {
+  enum {
+    RegPairOdd  = 1,
+    RegPairEven = 2
+  };
+}
+
 struct ARMRegisterInfo : public ARMGenRegisterInfo {
   const TargetInstrInfo &TII;
   const ARMSubtarget &STI;
-private:
-  /// FramePtr - ARM physical register used as frame ptr.
-  unsigned FramePtr;
 
 public:
   ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
@@ -49,10 +54,6 @@ public:
   /// if the register is a single precision VFP register.
   static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
 
-  /// getPointerRegClass - Return the register class to use to hold pointers.
-  /// This is used for addressing modes.
-  const TargetRegisterClass *getPointerRegClass() const;
-
   /// Code Generation virtual methods...
   const TargetRegisterClass *
     getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
@@ -65,6 +66,19 @@ public:
 
   bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
 
+  const TargetRegisterClass *getPointerRegClass() const;
+
+  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+  getAllocationOrder(const TargetRegisterClass *RC,
+                     unsigned HintType, unsigned HintReg,
+                     const MachineFunction &MF) const;
+
+  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                               const MachineFunction &MF) const;
+
+  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                          MachineFunction &MF) const;
+
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
 
   bool hasFP(const MachineFunction &MF) const;
@@ -95,6 +109,15 @@ public:
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
   
   bool isLowRegister(unsigned Reg) const;
+
+private:
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index ebe7d58..d864079 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -134,7 +134,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
     GPRClass::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.useThumbBacktraces()) {
+      if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved())
           return ARM_GPR_AO_4;
         else
@@ -154,7 +154,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
       GPRClass::iterator I;
 
-      if (Subtarget.useThumbBacktraces()) {
+      if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved()) {
           I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
         } else {
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 0000000..75fa707
--- /dev/null
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,35 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across ARM processors
+//
+def FU_iALU   : FuncUnit; // Integer alu unit
+def FU_iLdSt  : FuncUnit; // Integer load / store unit
+def FU_FpALU  : FuncUnit; // FP alu unit
+def FU_FpLdSt : FuncUnit; // FP load / store unit
+def FU_Br     : FuncUnit; // Branch unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALU    : InstrItinClass;
+def IIC_iLoad   : InstrItinClass;
+def IIC_iStore  : InstrItinClass;
+def IIC_fpALU   : InstrItinClass;
+def IIC_fpLoad  : InstrItinClass;
+def IIC_fpStore : InstrItinClass;
+def IIC_Br      : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[]>;
+
+include "ARMScheduleV6.td"
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 0000000..596a57f
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,22 @@
+//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+def V6Itineraries : ProcessorItineraries<[
+  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_iALU]>]>,
+  InstrItinData<IIC_iLoad   , [InstrStage<2, [FU_iLdSt]>]>,
+  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_iLdSt]>]>,
+  InstrItinData<IIC_fpALU   , [InstrStage<6, [FU_FpALU]>]>,
+  InstrItinData<IIC_fpLoad  , [InstrStage<2, [FU_FpLdSt]>]>,
+  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_FpLdSt]>]>,
+  InstrItinData<IIC_Br      , [InstrStage<3, [FU_Br]>]>
+]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index a978380..7ac7b49 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -24,7 +24,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
   , ARMFPUType(None)
   , IsThumb(isThumb)
   , ThumbMode(Thumb1)
-  , UseThumbBacktraces(false)
   , IsR9Reserved(false)
   , stackAlignment(4)
   , CPUString("generic")
@@ -83,8 +82,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
   if (isAAPCS_ABI())
     stackAlignment = 8;
 
-  if (isTargetDarwin()) {
-    UseThumbBacktraces = true;
+  if (isTargetDarwin())
     IsR9Reserved = true;
-  }
 }
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 0704055..c3cc7ff 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,6 +14,7 @@
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetSubtarget.h"
 #include <string>
 
@@ -48,9 +49,6 @@ protected:
   /// ThumbMode - Indicates supported Thumb version.
   ThumbTypeEnum ThumbMode;
 
-  /// UseThumbBacktraces - True if we use thumb style backtraces.
-  bool UseThumbBacktraces;
-
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
@@ -61,6 +59,9 @@ protected:
   /// CPUString - String name of used CPU.
   std::string CPUString;
 
+  /// Selected instruction itineraries (one entry per itinerary class.)
+  InstrItineraryData InstrItins;
+  
  public:
   enum {
     isELF, isDarwin
@@ -106,14 +107,17 @@ protected:
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
 
   bool isThumb() const { return IsThumb; }
-  bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
+  bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
 
-  bool useThumbBacktraces() const { return UseThumbBacktraces; }
   bool isR9Reserved() const { return IsR9Reserved; }
 
   const std::string & getCPUString() const { return CPUString; }
 
+  /// getInstrItins - Return the instruction itineraies based on subtarget 
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
index 4107dcc..42b8eae 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.cpp
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -17,80 +17,42 @@
 #include <cctype>
 using namespace llvm;
 
-
 const char *const llvm::arm_asm_table[] = {
-                                      "{r0}", "r0",
-                                      "{r1}", "r1",
-                                      "{r2}", "r2",
-                                      "{r3}", "r3",
-                                      "{r4}", "r4",
-                                      "{r5}", "r5",
-                                      "{r6}", "r6",
-                                      "{r7}", "r7",
-                                      "{r8}", "r8",
-                                      "{r9}", "r9",
-                                      "{r10}", "r10",
-                                      "{r11}", "r11",
-                                      "{r12}", "r12",
-                                      "{r13}", "r13",
-                                      "{r14}", "r14",
-                                      "{lr}", "lr",
-                                      "{sp}", "sp",
-                                      "{ip}", "ip",
-                                      "{fp}", "fp",
-                                      "{sl}", "sl",
-                                      "{memory}", "memory",
-                                      "{cc}", "cc",
-                                      0,0};
+  "{r0}", "r0",
+  "{r1}", "r1",
+  "{r2}", "r2",
+  "{r3}", "r3",
+  "{r4}", "r4",
+  "{r5}", "r5",
+  "{r6}", "r6",
+  "{r7}", "r7",
+  "{r8}", "r8",
+  "{r9}", "r9",
+  "{r10}", "r10",
+  "{r11}", "r11",
+  "{r12}", "r12",
+  "{r13}", "r13",
+  "{r14}", "r14",
+  "{lr}", "lr",
+  "{sp}", "sp",
+  "{ip}", "ip",
+  "{fp}", "fp",
+  "{sl}", "sl",
+  "{memory}", "memory",
+  "{cc}", "cc",
+  0,0
+};
 
 ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
   ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
-  GlobalPrefix = "_";
-  PrivateGlobalPrefix = "L";
-  LessPrivateGlobalPrefix = "l";
-  StringConstantPrefix = "\1LC";
-  BSSSection = 0;                       // no BSS section
   ZeroDirective = "\t.space\t";
   ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
   SetDirective = "\t.set\t";
-  WeakRefDirective = "\t.weak_reference\t";
-  WeakDefDirective = "\t.weak_definition ";
-  HiddenDirective = "\t.private_extern\t";
   ProtectedDirective = NULL;
-  JumpTableDataSection = ".const";
-  CStringSection = "\t.cstring";
   HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
-  NeedsIndirectEncoding = true;
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
-
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
-
-  NeedsSet = true;
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+  SupportsDebugInformation = true;
 }
 
 ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
@@ -115,7 +77,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",%progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",%progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
 
   if (Subtarget->isAAPCS_ABI()) {
     StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
@@ -124,6 +86,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
     StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
     StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
   }
+  SupportsDebugInformation = true;
 }
 
 /// Count the number of comma-separated arguments.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
index 9e6f856..683692f 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.h
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -26,8 +26,7 @@ namespace llvm {
 
   template <class BaseTAI>
   struct ARMTargetAsmInfo : public BaseTAI {
-    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM):
-      BaseTAI(TM) {
+    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) {
       BaseTAI::AsmTransCBE = arm_asm_table;
 
       BaseTAI::AlignmentIsInBytes = false;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7033907..8006b9b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,9 +23,6 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-static cl::opt<bool>
-EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
-                  cl::desc("Enable pre-regalloc load store optimization pass"));
 static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
                               cl::desc("Disable load store optimization pass"));
 static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@@ -42,6 +39,11 @@ int ARMTargetMachineModule = 0;
 static RegisterTarget<ARMTargetMachine>   X("arm",   "ARM");
 static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeARMTarget() { }
+}
+
 // No assembler printer by default
 ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
 
@@ -97,7 +99,8 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
     InstrInfo(Subtarget),
     FrameInfo(Subtarget),
     JITInfo(),
-    TLInfo(*this) {
+    TLInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
 }
 
@@ -149,8 +152,6 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
 
 bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
-  if (!EnablePreLdStOpti)
-    return false;
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
   if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
     PM.add(createARMLoadStoreOptimizationPass(true));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 7192c1b..c4c8e6c 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -28,13 +28,14 @@ namespace llvm {
 class Module;
 
 class ARMTargetMachine : public LLVMTargetMachine {
-  ARMSubtarget      Subtarget;
-  const TargetData  DataLayout;       // Calculates type size & alignment
-  ARMInstrInfo      InstrInfo;
-  ARMFrameInfo      FrameInfo;
-  ARMJITInfo        JITInfo;
-  ARMTargetLowering TLInfo;
-  Reloc::Model      DefRelocModel;    // Reloc model before it's overridden.
+  ARMSubtarget        Subtarget;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMInstrInfo        InstrInfo;
+  ARMFrameInfo        FrameInfo;
+  ARMJITInfo          JITInfo;
+  ARMTargetLowering   TLInfo;
+  InstrItineraryData  InstrItins;
+  Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
 
 protected:
   // To avoid having target depend on the asmprinter stuff libraries, asmprinter
@@ -59,6 +60,9 @@ public:
   virtual       ARMTargetLowering *getTargetLowering() const {
     return const_cast<ARMTargetLowering*>(&TLInfo);
   }
+  virtual const InstrItineraryData getInstrItineraryData() const {  
+    return InstrItins;
+  }
 
   static void registerAsmPrinter(AsmPrinterCtorFn F) {
     AsmPrinterCtor = F;
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index d908cf4..948a100 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -45,7 +45,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
 namespace {
   class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
     DwarfWriter *DW;
-    MachineModuleInfo *MMI;
 
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when printing asm code for different targets.
@@ -84,7 +83,7 @@ namespace {
     explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
                            bool V)
-      : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL),
+      : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL),
         InCPMode(false) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
@@ -97,6 +96,7 @@ namespace {
                       const char *Modifier = 0);
     void printSOImmOperand(const MachineInstr *MI, int opNum);
     void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+    void printSOOperand(const MachineInstr *MI, int OpNum);
     void printSORegOperand(const MachineInstr *MI, int opNum);
     void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
     void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
@@ -396,6 +396,28 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
   printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
 }
 
+// Constant shifts so_reg is a 3-operand unit corresponding to register forms of
+// the A5.1 "Addressing Mode 1 - Data-processing operands" forms.  This
+// includes:
+// REG 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  O << TM.getRegisterInfo()->getAsmName(Reg);
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+    << " ";
+
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
 // "Addressing Mode 1 - Data-processing operands" forms.  This includes:
 //    REG 0   0    - e.g. R5
@@ -805,17 +827,11 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 bool ARMAsmPrinter::doInitialization(Module &M) {
 
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMI);
   DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
 
-  // Darwin wants symbols to be quoted if they have complex names.
-  if (Subtarget->isTargetDarwin())
-    Mang->setUseQuotes(true);
+  // Thumb-2 instructions are supported only in unified assembler syntax mode.
+  if (Subtarget->hasThumb2())
+    O << "\t.syntax unified\n";
 
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF()) {
@@ -1115,3 +1131,9 @@ namespace {
     }
   } Registrator;
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeARMAsmPrinter() { }
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 7ed8ef6..1be1713 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -39,7 +39,7 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
 
 AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
   // Set up the TargetLowering object.
-  //I am having problems with shr n ubyte 1
+  //I am having problems with shr n i8 1
   setShiftAmountType(MVT::i64);
   setBooleanContents(ZeroOrOneBooleanContent);
   
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 4c83054..cdd4fa4 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -21,16 +21,17 @@
 
 using namespace llvm;
 
-/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int AlphaTargetMachineModule;
-int AlphaTargetMachineModule = 0;
-
 // Register the targets
 static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
 
+// No assembler printer by default
+AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeAlphaTarget() { }
+}
+
 const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
   return new AlphaTargetAsmInfo(*this);
 }
@@ -92,23 +93,32 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   PM.add(createAlphaLLRPPass(*this));
-  PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
                                         bool DumpAsm, MachineCodeEmitter &MCE) {
   PM.add(createAlphaCodeEmitterPass(*this, MCE));
-  if (DumpAsm)
-    PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
                                         bool DumpAsm, JITCodeEmitter &JCE) {
   PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm)
-    PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
   return false;
 }
 bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 51224e8..946ca55 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -33,10 +33,18 @@ class AlphaTargetMachine : public LLVMTargetMachine {
   AlphaJITInfo JITInfo;
   AlphaSubtarget Subtarget;
   AlphaTargetLowering TLInfo;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   AlphaTargetMachine(const Module &M, const std::string &FS);
 
@@ -46,7 +54,7 @@ public:
   virtual const AlphaRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual AlphaTargetLowering* getTargetLowering() const { 
+  virtual AlphaTargetLowering* getTargetLowering() const {
     return const_cast<AlphaTargetLowering*>(&TLInfo);
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
@@ -56,7 +64,7 @@ public:
 
   static unsigned getJITMatchQuality();
   static unsigned getModuleMatchQuality(const Module &M);
-  
+
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
@@ -75,6 +83,10 @@ public:
                                     CodeGenOpt::Level OptLevel,
                                     bool DumpAsm,
                                     JITCodeEmitter &JCE);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 74b48ee6..7b73bb3 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -303,3 +303,17 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   O << ")";
   return false;
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeAlphaAsmPrinter() { }
+}
+
+namespace {
+  static struct Register {
+    Register() {
+      AlphaTargetMachine::registerAsmPrinter(createAlphaCodePrinterPass);
+    }
+  } Registrator;
+}
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 5814d27..c3554f6 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -59,6 +59,11 @@ int CBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CTargetMachine> X("c", "C backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCBackendTarget() { }
+}
+
 namespace {
   /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
   /// any unnamed structure types that are used by the program, and merges
@@ -1449,6 +1454,17 @@ std::string CWriter::GetValueName(const Value *Operand) {
 /// writeInstComputationInline - Emit the computation for the specified
 /// instruction inline, with no destination provided.
 void CWriter::writeInstComputationInline(Instruction &I) {
+  // We can't currently support integer types other than 1, 8, 16, 32, 64.
+  // Validate this.
+  const Type *Ty = I.getType();
+  if (Ty->isInteger() && (Ty!=Type::Int1Ty && Ty!=Type::Int8Ty &&
+        Ty!=Type::Int16Ty && Ty!=Type::Int32Ty && Ty!=Type::Int64Ty)) {
+      cerr << "The C backend does not currently support integer "
+           << "types of widths other than 1, 8, 16, 32, 64.\n";
+      cerr << "This is being tracked as PR 4158.\n";
+      abort();
+  }
+
   // If this is a non-trivial bool computation, make sure to truncate down to
   // a 1 bit value.  This is important because we want "add i1 x, y" to return
   // "0" when x and y are true, not "2" for example.
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index da1bf07..26a8ece 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -287,12 +287,11 @@ namespace {
   /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
   class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
     DwarfWriter *DW;
-    MachineModuleInfo *MMI;
   public:
     explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level F,
                              bool V)
-      : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {}
+      : SPUAsmPrinter(O, TM, T, F, V), DW(0) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -490,12 +489,8 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
 
 bool LinuxAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
-  SwitchToTextSection("\t.text");
-  // Emit initial debug information.
   DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  DW->BeginModule(&M, MMI, O, this, TAI);
+  SwitchToTextSection("\t.text");
   return Result;
 }
 
@@ -621,3 +616,17 @@ FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
                                             bool verbose) {
   return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeCellSPUAsmPrinter() { }
+}
+
+namespace {
+  static struct Register {
+    Register() {
+      SPUTargetMachine::registerAsmPrinter(createSPUAsmPrinterPass);
+    }
+  } Registrator;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 864a914..b286443 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -249,6 +249,25 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 
+  // Expand double-width multiplication
+  // FIXME: It would probably be reasonable to support some of these operations
+  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
+  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
+
   // Need to custom handle (some) common i8, i64 math ops
   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
index ff88ed8..2868ff7 100644
--- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
@@ -41,7 +41,6 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
 
   SupportsDebugInformation = true;
   NeedsSet = true;
-  SupportsMacInfoSection = false;
   DwarfAbbrevSection =  "\t.section        .debug_abbrev,\"\",@progbits";
   DwarfInfoSection =    "\t.section        .debug_info,\"\",@progbits";
   DwarfLineSection =    "\t.section        .debug_line,\"\",@progbits";
@@ -52,7 +51,7 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
   DwarfLocSection =     "\t.section        .debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section        .debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section        .debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section        .debug_macinfo,\"\",progbits";
+  DwarfMacroInfoSection = 0;  // macro info not supported.
 
   // Exception handling is not supported on CellSPU (think about it: you only
   // have 256K for code+data. Would you support exception handling?)
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 7fa9022..c675ebb 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -23,20 +23,20 @@
 
 using namespace llvm;
 
-/// CellSPUTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int CellSPUTargetMachineModule;
-int CellSPUTargetMachineModule = 0;
-
 namespace {
   // Register the targets
   RegisterTarget<SPUTargetMachine>
   CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]");
 }
 
+// No assembler printer by default
+SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCellSPUTarget() { }
+}
+
 const std::pair<unsigned, int> *
 SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
   NumEntries = 1;
@@ -93,6 +93,9 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
                                           bool Verbose,
                                           raw_ostream &Out) {
-  PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index cd39203..d8fe300 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -35,10 +35,18 @@ class SPUTargetMachine : public LLVMTargetMachine {
   SPUFrameInfo        FrameInfo;
   SPUTargetLowering   TLInfo;
   InstrItineraryData  InstrItins;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            SPUTargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   SPUTargetMachine(const Module &M, const std::string &FS);
 
@@ -78,7 +86,7 @@ public:
     return &DataLayout;
   }
 
-  virtual const InstrItineraryData getInstrItineraryData() const {  
+  virtual const InstrItineraryData getInstrItineraryData() const {
     return InstrItins;
   }
   
@@ -88,6 +96,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 04a6829..1feea96 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -82,6 +82,11 @@ int CppBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCppBackendTarget() { }
+}
+
 namespace {
   typedef std::vector<const Type*> TypeList;
   typedef std::map<const Type*,std::string> TypeMap;
diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp
index 05d2351..d7d675a 100644
--- a/lib/Target/DarwinTargetAsmInfo.cpp
+++ b/lib/Target/DarwinTargetAsmInfo.cpp
@@ -50,6 +50,53 @@ DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM)
   ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None);
   DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced",
                                     SectionFlags::Writeable);
+    
+  
+  // Common settings for all Darwin targets.
+  // Syntax:
+  GlobalPrefix = "_";
+  PrivateGlobalPrefix = "L";
+  LessPrivateGlobalPrefix = "l";  // Marker for some ObjC metadata
+  StringConstantPrefix = "\1LC";
+  NeedsSet = true;
+  NeedsIndirectEncoding = true;
+  AllowQuotesInName = true;
+  HasSingleParameterDotFile = false;
+
+  // In non-PIC modes, emit a special label before jump tables so that the
+  // linker can perform more accurate dead code stripping.  We do not check the
+  // relocation model here since it can be overridden later.
+  JumpTableSpecialLabelPrefix = "l";
+    
+  // Directives:
+  WeakDefDirective = "\t.weak_definition ";
+  WeakRefDirective = "\t.weak_reference ";
+  HiddenDirective = "\t.private_extern ";
+    
+  // Sections:
+  CStringSection = "\t.cstring";
+  JumpTableDataSection = "\t.const\n";
+  BSSSection = 0;
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorsSection = ".constructor";
+    StaticDtorsSection = ".destructor";
+  } else {
+    StaticCtorsSection = ".mod_init_func";
+    StaticDtorsSection = ".mod_term_func";
+  }
+    
+  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+  DwarfMacroInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
 }
 
 /// emitUsedDirectiveFor - On Darwin, internally linked data beginning with
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index fc54e23..662c667 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -374,3 +374,18 @@ FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
                                               bool verbose) {
   return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      IA64TargetMachine::registerAsmPrinter(createIA64CodePrinterPass);
+    }
+  } Registrator;
+}
+
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeIA64AsmPrinter() { }
+}
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
index 34a0686..c545b9c 100644
--- a/lib/Target/IA64/IA64ISelLowering.cpp
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -107,6 +107,10 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VAARG             , MVT::Other, Custom);
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+  // FIXME: These should be legal
+  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
+  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
   
   // Use the default implementation.
   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 878a00a..0b93ee5 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -19,16 +19,18 @@
 #include "llvm/Target/TargetMachineRegistry.h"
 using namespace llvm;
 
-/// IA64TargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int IA64TargetMachineModule;
-int IA64TargetMachineModule = 0;
-
-static RegisterTarget<IA64TargetMachine> X("ia64", 
+// Register the target
+static RegisterTarget<IA64TargetMachine> X("ia64",
                                            "IA-64 (Itanium) [experimental]");
 
+// No assembler printer by default
+IA64TargetMachine::AsmPrinterCtorFn IA64TargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeIA64Target() { }
+}
+
 const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
   return new IA64TargetAsmInfo(*this);
 }
@@ -88,7 +90,10 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                            CodeGenOpt::Level OptLevel,
                                            bool Verbose,
                                            raw_ostream &Out) {
-  PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
 
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
index 29d625c..a64da9f 100644
--- a/lib/Target/IA64/IA64TargetMachine.h
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -30,24 +30,32 @@ class IA64TargetMachine : public LLVMTargetMachine {
   TargetFrameInfo    FrameInfo;
   //IA64JITInfo      JITInfo;
   IA64TargetLowering TLInfo;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
 
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            IA64TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   IA64TargetMachine(const Module &M, const std::string &FS);
 
   virtual const IA64InstrInfo      *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameInfo    *getFrameInfo() const { return &FrameInfo; }
   virtual const IA64Subtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual       IA64TargetLowering *getTargetLowering() const { 
+  virtual       IA64TargetLowering *getTargetLowering() const {
     return const_cast<IA64TargetLowering*>(&TLInfo);
   }
   virtual const IA64RegisterInfo   *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-  
+
   static unsigned getModuleMatchQuality(const Module &M);
 
   // Pass Pipeline Configuration
@@ -56,6 +64,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 37e5b1e..0aff14f 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -55,6 +55,11 @@ int MSILTargetMachineModule = 0;
 
 static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMSILTarget() { }
+}
+
 bool MSILModule::runOnModule(Module &M) {
   ModulePtr = &M;
   TD = &getAnalysis<TargetData>();
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 7886946..0f5244d 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -35,6 +35,11 @@ int MSP430TargetMachineModule = 0;
 static RegisterTarget<MSP430TargetMachine>
 X("msp430", "MSP430 [experimental]");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMSP430Target() { }
+}
+
 MSP430TargetMachine::MSP430TargetMachine(const Module &M,
                                          const std::string &FS) :
   Subtarget(*this, M, FS),
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index dfb6238..077ec96 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -578,3 +578,17 @@ doFinalization(Module &M)
 
   return AsmPrinter::doFinalization(M);
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      MipsTargetMachine::registerAsmPrinter(createMipsCodePrinterPass);
+    }
+  } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeMipsAsmPrinter() { }
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 4517cfc..42afceb 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -95,6 +95,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
@@ -122,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Expand);
+  setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Expand);
 
   // We don't have line number support yet.
   setOperationAction(ISD::DBG_STOPPOINT,     MVT::Other, Expand);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ef524e3..83b9b62 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -31,6 +31,14 @@ int MipsTargetMachineModule = 0;
 static RegisterTarget<MipsTargetMachine>    X("mips", "Mips");
 static RegisterTarget<MipselTargetMachine>  Y("mipsel", "Mipsel");
 
+MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMipsTarget() { }
+}
+
 const TargetAsmInfo *MipsTargetMachine::
 createTargetAsmInfo() const 
 {
@@ -125,9 +133,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
 // true if AssemblyEmitter is supported
 bool MipsTargetMachine::
 addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, 
-                   bool Verbose, raw_ostream &Out) 
-{
+                   bool Verbose, raw_ostream &Out)  {
   // Output assembly language.
-  PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose));
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a9e1df2..85fafad 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -33,10 +33,23 @@ namespace llvm {
   
   protected:
     virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+  protected:
+    // To avoid having target depend on the asmprinter stuff libraries,
+    // asmprinter set this functions to ctor pointer at startup time if they are
+    // linked in.
+    typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                              MipsTargetMachine &tm,
+                                              CodeGenOpt::Level OptLevel,
+                                              bool verbose);
+    static AsmPrinterCtorFn AsmPrinterCtor;
+    
   public:
     MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle);
 
+    static void registerAsmPrinter(AsmPrinterCtorFn F) {
+      AsmPrinterCtor = F;
+    }
+    
     virtual const MipsInstrInfo   *getInstrInfo()     const 
     { return &InstrInfo; }
     virtual const TargetFrameInfo *getFrameInfo()     const 
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index f9a8801..ca1089b 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -48,27 +48,10 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   CurrentFnName = Mang->getValueName(F);
 
-  // Iterate over the first basic block instructions to find if it has a
-  // DebugLoc. If so emit .file directive. Instructions such as movlw do not
-  // have valid DebugLoc, so need to iterate over instructions.
-  MachineFunction::const_iterator I = MF.begin();
-  for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end();
-       MBBI != E; MBBI++) {
-    const DebugLoc DLoc = MBBI->getDebugLoc();
-    if (!DLoc.isUnknown()) {
-      GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit;
-      unsigned line = MF.getDebugLocTuple(DLoc).Line;
-      DbgInfo.EmitFileDirective(CU);
-      DbgInfo.SetFunctBeginLine(line);
-      break;
-    }
-  }
-
   // Emit the function frame (args and temps).
   EmitFunctionFrame(MF);
 
-  // Emit function begin debug directive.
-  DbgInfo.EmitFunctBeginDI(F);
+  DbgInfo.BeginFunction(MF);
 
   // Emit the autos section of function.
   EmitAutos(CurrentFnName);
@@ -89,9 +72,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit function start label.
   O << CurrentFnName << ":\n";
 
-  // For emitting line directives, we need to keep track of the current
-  // source line. When it changes then only emit the line directive.
-  unsigned CurLine = 0;
+  DebugLoc CurDL;
   O << "\n"; 
   // Print out code for the function.
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
@@ -109,12 +90,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
       // Emit the line directive if source line changed.
       const DebugLoc DL = II->getDebugLoc();
-      if (!DL.isUnknown()) {
-        unsigned line = MF.getDebugLocTuple(DL).Line;
-        if (line != CurLine) {
-          O << "\t.line " << line << "\n";
-          CurLine = line;
-        }
+      if (!DL.isUnknown() && DL != CurDL) {
+        DbgInfo.ChangeDebugLoc(MF, DL);
+        CurDL = DL;
       }
         
       // Print the assembly for the instruction.
@@ -123,7 +101,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   }
   
   // Emit function end debug directives.
-  DbgInfo.EmitFunctEndDI(F, CurLine);
+  DbgInfo.EndFunction(MF);
 
   return false;  // we didn't modify anything.
 }
@@ -226,7 +204,7 @@ bool PIC16AsmPrinter::doInitialization (Module &M) {
     I->setSection(TAI->SectionForGlobal(I)->getName());
   }
 
-  DbgInfo.Init(M);
+  DbgInfo.BeginModule(M);
   EmitFunctionDecls(M);
   EmitUndefinedVars(M);
   EmitDefinedVars(M);
@@ -313,8 +291,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M)
 bool PIC16AsmPrinter::doFinalization(Module &M) {
   printLibcallDecls();
   EmitRemainingAutos();
-  DbgInfo.EmitVarDebugInfo(M);
-  DbgInfo.EmitEOF();
+  DbgInfo.EndModule(M);
   O << "\n\t" << "END\n";
   bool Result = AsmPrinter::doFinalization(M);
   return Result;
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 8bdcf72..3ec5659 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -32,7 +32,7 @@ namespace llvm {
     explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level OL,
                              bool V)
-      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) {
+      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) {
       PTLI = TM.getTargetLowering();
       PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
     }
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index d7ebea7..27551cd 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -14,91 +14,23 @@
 #include "PIC16.h"
 #include "PIC16DebugInfo.h" 
 #include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
-                                     bool &HasAux, int Aux[], 
-                                     std::string &TypeName) {
-  if (Ty.isBasicType(Ty.getTag())) {
-    std::string Name = "";
-    Ty.getName(Name);
-    unsigned short BaseTy = GetTypeDebugNumber(Name);
-    TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-    TypeNo = TypeNo | (0xffff & BaseTy);
-  }
-  else if (Ty.isDerivedType(Ty.getTag())) {
-    switch(Ty.getTag())
-    {
-      case dwarf::DW_TAG_pointer_type:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-        TypeNo = TypeNo | PIC16Dbg::DT_PTR;
-        break;
-      default:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-    }
-    DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
-    PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
-  }
-  else if (Ty.isCompositeType(Ty.getTag())) {
-    switch (Ty.getTag()) {
-      case dwarf::DW_TAG_array_type: {
-        DICompositeType CTy = DICompositeType(Ty.getGV());
-        DIArray Elements = CTy.getTypeArray();
-        unsigned short size = 1;
-        unsigned short Dimension[4]={0,0,0,0};
-        for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-          DIDescriptor Element = Elements.getElement(i);
-          if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
-            TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-            TypeNo = TypeNo | PIC16Dbg::DT_ARY;
-            DISubrange SubRange = DISubrange(Element.getGV());
-            Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
-            // Each dimension is represented by 2 bytes starting at byte 9.
-            Aux[8+i*2+0] = Dimension[i];
-            Aux[8+i*2+1] = Dimension[i] >> 8;
-            size = size * Dimension[i];
-          }
-        }
-        HasAux = true;
-        // In auxillary entry for array, 7th and 8th byte represent array size.
-        Aux[6] = size & 0xff;
-        Aux[7] = size >> 8;
-        DIType BaseType = CTy.getTypeDerivedFrom();
-        PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
-
-        break;
-      }
-      case dwarf:: DW_TAG_union_type:
-      case dwarf::DW_TAG_structure_type: {
-        DICompositeType CTy = DICompositeType(Ty.getGV());
-        TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-        if (Ty.getTag() == dwarf::DW_TAG_structure_type)
-          TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
-        else
-          TypeNo = TypeNo | PIC16Dbg::T_UNION;
-        CTy.getName(TypeName);
-        // UniqueSuffix is .number where number is obtained from 
-        // llvm.dbg.composite<number>.
-        std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
-        TypeName += UniqueSuffix;
-        unsigned short size = CTy.getSizeInBits()/8;
-        // 7th and 8th byte represent size.   
-        HasAux = true;
-        Aux[6] = size & 0xff;
-        Aux[7] = size >> 8;
-        break;
-      }
-      case dwarf::DW_TAG_enumeration_type: {
-        TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-        TypeNo = TypeNo | PIC16Dbg::T_ENUM;
-        break;
-      }
-      default:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-    }
-  }
+/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty.
+///
+void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
+                                      bool &HasAux, int Aux[], 
+                                      std::string &TagName) {
+  if (Ty.isBasicType(Ty.getTag())) 
+    PopulateBasicTypeInfo (Ty, TypeNo);
+  else if (Ty.isDerivedType(Ty.getTag())) 
+    PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+  else if (Ty.isCompositeType(Ty.getTag())) 
+    PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
   else {
     TypeNo = PIC16Dbg::T_NULL;
     HasAux = false;
@@ -106,7 +38,127 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
   return;
 }
 
+/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty.
+///
+void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) {
+  std::string Name = "";
+  Ty.getName(Name);
+  unsigned short BaseTy = GetTypeDebugNumber(Name);
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  TypeNo = TypeNo | (0xffff & BaseTy);
+}
+
+/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type 
+/// from Ty. Derived types are mostly pointers.
+///
+void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                            bool &HasAux, int Aux[],
+                                            std::string &TagName) {
+
+  switch(Ty.getTag())
+  {
+    case dwarf::DW_TAG_pointer_type:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+      TypeNo = TypeNo | PIC16Dbg::DT_PTR;
+      break;
+    default:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+  }
+  
+  // We also need to encode the the information about the base type of
+  // pointer in TypeNo.
+  DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
+  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty.
+void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                          bool &HasAux, int Aux[],
+                                          std::string &TagName) {
 
+  DICompositeType CTy = DICompositeType(Ty.getGV());
+  DIArray Elements = CTy.getTypeArray();
+  unsigned short size = 1;
+  unsigned short Dimension[4]={0,0,0,0};
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+      TypeNo = TypeNo | PIC16Dbg::DT_ARY;
+      DISubrange SubRange = DISubrange(Element.getGV());
+      Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
+      // Each dimension is represented by 2 bytes starting at byte 9.
+      Aux[8+i*2+0] = Dimension[i];
+      Aux[8+i*2+1] = Dimension[i] >> 8;
+      size = size * Dimension[i];
+    }
+  }
+  HasAux = true;
+  // In auxillary entry for array, 7th and 8th byte represent array size.
+  Aux[6] = size & 0xff;
+  Aux[7] = size >> 8;
+  DIType BaseType = CTy.getTypeDerivedFrom();
+  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for 
+/// structure or union.
+///
+void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, 
+                                                  unsigned short &TypeNo,
+                                                  bool &HasAux, int Aux[],
+                                                  std::string &TagName) {
+  DICompositeType CTy = DICompositeType(Ty.getGV());
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  if (Ty.getTag() == dwarf::DW_TAG_structure_type)
+    TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
+  else
+    TypeNo = TypeNo | PIC16Dbg::T_UNION;
+  CTy.getName(TagName);
+  // UniqueSuffix is .number where number is obtained from
+  // llvm.dbg.composite<number>.
+  std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+  TagName += UniqueSuffix;
+  unsigned short size = CTy.getSizeInBits()/8;
+  // 7th and 8th byte represent size.
+  HasAux = true;
+  Aux[6] = size & 0xff;
+  Aux[7] = size >> 8;
+}
+
+/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty.
+void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) {
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  TypeNo = TypeNo | PIC16Dbg::T_ENUM;
+}
+
+/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for 
+/// composite types from Ty.
+///
+void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                              bool &HasAux, int Aux[],
+                                              std::string &TagName) {
+  switch (Ty.getTag()) {
+    case dwarf::DW_TAG_array_type: {
+      PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+      break;
+    }
+    case dwarf:: DW_TAG_union_type:
+    case dwarf::DW_TAG_structure_type: {
+      PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+      break;
+    }
+    case dwarf::DW_TAG_enumeration_type: {
+      PopulateEnumTypeInfo (Ty, TypeNo);
+      break;
+    }
+    default:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+  }
+}
+
+/// GetTypeDebugNumber - Get debug type number for given type.
+///
 unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type)  {
   if (type == "char")
     return PIC16Dbg::T_CHAR;
@@ -127,8 +179,10 @@ unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type)  {
   else
     return 0;
 }
-
-short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
+ 
+/// GetStorageClass - Get storage class for give debug variable.
+///
+short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) {
   short ClassNo;
   if (PAN::isLocalName(DIGV.getGlobal()->getName())) {
     // Generating C_AUTO here fails due to error in linker. Change it once
@@ -142,12 +196,126 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
   return ClassNo;
 }
 
-void PIC16DbgInfo::Init(Module &M) {
-  // Do all debug related initializations here.
-  EmitFileDirective(M);
+/// BeginModule - Emit necessary debug info to start a Module and do other
+/// required initializations.
+void PIC16DbgInfo::BeginModule(Module &M) {
+  // Emit file directive for module.
+  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
+  if (CU) {
+    EmitDebugDirectives = true;
+    SwitchToCU(CU);
+  }
+
+  // Emit debug info for decls of composite types.
   EmitCompositeTypeDecls(M);
 }
 
+/// Helper to find first valid debug loc for a function.
+///
+static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) {
+  DebugLoc DL;
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II) {
+      DL = II->getDebugLoc();
+      if (!DL.isUnknown())
+        return DL;
+    }
+  }
+  return DL;
+}
+
+/// BeginFunction - Emit necessary debug info to start a function.
+///
+void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
+  if (! EmitDebugDirectives) return;
+  
+  // Retreive the first valid debug Loc and process it.
+  const DebugLoc &DL = GetDebugLocForFunction(MF);
+  ChangeDebugLoc(MF, DL, true);
+
+  EmitFunctBeginDI(MF.getFunction());
+  
+  // Set current line to 0 so that.line directive is genearted after .bf.
+  CurLine = 0;
+}
+
+/// ChangeDebugLoc - Take necessary steps when DebugLoc changes.
+/// CurFile and CurLine may change as a result of this.
+///
+void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,  
+                                  const DebugLoc &DL, bool IsInBeginFunction) {
+  if (! EmitDebugDirectives) return;
+  assert (! DL.isUnknown()  && "can't change to invalid debug loc");
+
+  GlobalVariable *CU = MF.getDebugLocTuple(DL).CompileUnit;
+  unsigned line = MF.getDebugLocTuple(DL).Line;
+
+  SwitchToCU(CU);
+  SwitchToLine(line, IsInBeginFunction);
+}
+
+/// SwitchToLine - Emit line directive for a new line.
+///
+void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) {
+  if (CurLine == Line) return;
+  if (!IsInBeginFunction)  O << "\n\t.line " << Line << "\n";
+  CurLine = Line;
+}
+
+/// EndFunction - Emit .ef for end of function.
+///
+void PIC16DbgInfo::EndFunction(const MachineFunction &MF) {
+  if (! EmitDebugDirectives) return;
+  EmitFunctEndDI(MF.getFunction(), CurLine);
+}
+
+/// EndModule - Emit .eof for end of module.
+///
+void PIC16DbgInfo::EndModule(Module &M) {
+  if (! EmitDebugDirectives) return;
+  EmitVarDebugInfo(M);
+  if (CurFile != "") O << "\n\t.eof";
+}
+ 
+/// EmitCompositeTypeElements - Emit debug information for members of a 
+/// composite type.
+/// 
+void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
+                                              std::string UniqueSuffix) { 
+  unsigned long Value = 0;
+  DIArray Elements = CTy.getTypeArray();
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
+    DIDescriptor Element = Elements.getElement(i);
+    unsigned short TypeNo = 0;
+    bool HasAux = false;
+    int ElementAux[PIC16Dbg::AuxSize] = { 0 };
+    std::string TagName = "";
+    std::string ElementName;
+    GlobalVariable *GV = Element.getGV();
+    DIDerivedType DITy(GV);
+    DITy.getName(ElementName);
+    unsigned short ElementSize = DITy.getSizeInBits()/8;
+    // Get mangleddd name for this structure/union  element.
+    std::string MangMemName = ElementName + UniqueSuffix;
+    PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
+    short Class;
+    if( CTy.getTag() == dwarf::DW_TAG_union_type)
+      Class = PIC16Dbg::C_MOU;
+    else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
+      Class = PIC16Dbg::C_MOS;
+    EmitSymbol(MangMemName, Class, TypeNo, Value);
+    if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+      Value += ElementSize;
+    if (HasAux)
+      EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TagName);
+  }
+}
+
+/// EmitCompositeTypeDecls - Emit composite type declarations like structure 
+/// and union declarations.
+///
 void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
   for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
       E = M.getGlobalList().end(); I != E; I++) {
@@ -178,33 +346,10 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
 
         // Emit auxiliary debug information for structure/union tag. 
         EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
-        unsigned long Value = 0;
-        DIArray Elements = CTy.getTypeArray();
-        for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
-          DIDescriptor Element = Elements.getElement(i);
-          unsigned short TypeNo = 0;
-          bool HasAux = false;
-          int ElementAux[PIC16Dbg::AuxSize] = { 0 };
-          std::string TypeName = "";
-          std::string ElementName;
-          GlobalVariable *GV = Element.getGV();
-          DIDerivedType DITy(GV);
-          DITy.getName(ElementName);
-          unsigned short ElementSize = DITy.getSizeInBits()/8;
-          // Get mangleddd name for this structure/union  element.
-          std::string MangMemName = ElementName + UniqueSuffix;
-	  PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName);
-          short Class;
-          if( CTy.getTag() == dwarf::DW_TAG_union_type)
-            Class = PIC16Dbg::C_MOU;
-          else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
-            Class = PIC16Dbg::C_MOS;
-          EmitSymbol(MangMemName, Class, TypeNo, Value);
-          if (CTy.getTag() == dwarf::DW_TAG_structure_type)
-            Value += ElementSize;
-          if (HasAux)
-            EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName);
-        }
+
+        // Emit members.
+        EmitCompositeTypeElements (CTy, UniqueSuffix);
+
         // Emit mangled Symbol for end of structure/union.
         std::string EOSSymbol = ".eos" + UniqueSuffix;
         EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
@@ -214,6 +359,8 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
   }
 }
 
+/// EmitFunctBeginDI - Emit .bf for function.
+///
 void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
   std::string FunctName = F->getName();
   if (EmitDebugDirectives) {
@@ -221,16 +368,20 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
     std::string BlockBeginSym = ".bb." + FunctName;
 
     int BFAux[PIC16Dbg::AuxSize] = {0};
-    BFAux[4] = FunctBeginLine;
-    BFAux[5] = FunctBeginLine >> 8;
+    BFAux[4] = CurLine;
+    BFAux[5] = CurLine >> 8;
+
     // Emit debug directives for beginning of function.
     EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
     EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
+
     EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
     EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
   }
 }
 
+/// EmitFunctEndDI - Emit .ef for function end.
+///
 void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
   std::string FunctName = F->getName();
   if (EmitDebugDirectives) {
@@ -241,8 +392,8 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
     EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
     int EFAux[PIC16Dbg::AuxSize] = {0};
     // 5th and 6th byte stand for line number.
-    EFAux[4] = Line;
-    EFAux[5] = Line >> 8;
+    EFAux[4] = CurLine;
+    EFAux[5] = CurLine >> 8;
     EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
     EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
     EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
@@ -251,15 +402,18 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
 
 /// EmitAuxEntry - Emit Auxiliary debug information.
 ///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num,
-                                std::string tag) {
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
+                                std::string TagName) {
   O << "\n\t.dim " << VarName << ", 1" ;
-  if (tag != "")
-    O << ", " << tag;
-  for (int i = 0; i<num; i++)
+  // TagName is emitted in case of structure/union objects.
+  if (TagName != "")
+    O << ", " << TagName;
+  for (int i = 0; i<Num; i++)
     O << "," << Aux[i];
 }
 
+/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
+///
 void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
                               Type, unsigned long Value) {
   O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " 
@@ -268,6 +422,8 @@ void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
     O  << ", value = " << Value;
 }
 
+/// EmitVarDebugInfo - Emit debug information for all variables.
+///
 void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
   GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables");
   if (!Root)
@@ -283,47 +439,45 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
       unsigned short TypeNo = 0;
       bool HasAux = false;
       int Aux[PIC16Dbg::AuxSize] = { 0 };
-      std::string TypeName = "";
+      std::string TagName = "";
       std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName();
-      PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName);
+      PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
       // Emit debug info only if type information is availaible.
       if (TypeNo != PIC16Dbg::T_NULL) {
         O << "\n\t.type " << VarName << ", " << TypeNo;
-        short ClassNo = getClass(DIGV);
+        short ClassNo = getStorageClass(DIGV);
         O << "\n\t.class " << VarName << ", " << ClassNo;
         if (HasAux) 
-          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName);
+          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
       }
     }
   }
   O << "\n";
 }
 
-void PIC16DbgInfo::EmitFileDirective(Module &M) {
-  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
-  if (CU) {
-    EmitDebugDirectives = true;
-    EmitFileDirective(CU, false);
-  }
-}
+/// SwitchToCU - Switch to a new compilation unit.
+///
+void PIC16DbgInfo::SwitchToCU(GlobalVariable *CU) {
+  // Get the file path from CU.
+  DICompileUnit cu(CU);
+  std::string DirName, FileName;
+  std::string FilePath = cu.getDirectory(DirName) + "/" + 
+                         cu.getFilename(FileName);
 
-void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) {
-  std::string Dir, FN;
-  DICompileUnit DIUnit(CU);
-  std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
-  if ( File != CurFile ) {
-    if (EmitEof)
-      EmitEOF();
-    O << "\n\t.file\t\"" << File << "\"\n" ;
-    CurFile = File;
-  }
+  // Nothing to do if source file is still same.
+  if ( FilePath == CurFile ) return;
+
+  // Else, close the current one and start a new.
+  if (CurFile != "") O << "\n\t.eof";
+  O << "\n\t.file\t\"" << FilePath << "\"\n" ;
+  CurFile = FilePath;
+  CurLine = 0;
 }
 
+/// EmitEOF - Emit .eof for end of file.
+///
 void PIC16DbgInfo::EmitEOF() {
   if (CurFile != "")
     O << "\n\t.EOF";
 }
 
-void PIC16DbgInfo::SetFunctBeginLine(unsigned line) {
-  FunctBeginLine = line;
-}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
index 9d50380..d126d85 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -20,6 +20,8 @@
 #include <map>
 
 namespace llvm {
+  class MachineFunction;
+  class DebugLoc;
   namespace PIC16Dbg {
     enum VarType {
       T_NULL,
@@ -94,33 +96,64 @@ namespace llvm {
     raw_ostream &O;
     const TargetAsmInfo *TAI;
     std::string CurFile;
+    unsigned CurLine;
+
     // EmitDebugDirectives is set if debug information is available. Default
     // value for it is false.
     bool EmitDebugDirectives;
-    unsigned FunctBeginLine;
+
   public:
     PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
-      CurFile = ""; 
+      CurFile = "";
+      CurLine = 0;
       EmitDebugDirectives = false; 
     }
-    void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
+
+    void BeginModule (Module &M);
+    void BeginFunction (const MachineFunction &MF);
+    void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL,
+                         bool IsInBeginFunction = false);
+    void EndFunction (const MachineFunction &MF);
+    void EndModule (Module &M);
+
+
+    private:
+    void SwitchToCU (GlobalVariable *CU);
+    void SwitchToLine (unsigned Line, bool IsInBeginFunction = false);
+
+    void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux,
                            int Aux[], std::string &TypeName);
-    unsigned GetTypeDebugNumber(std::string &type);
-    short getClass(DIGlobalVariable DIGV);
+    void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo);
+    void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, 
+                                  bool &HasAux, int Aux[],
+                                  std::string &TypeName);
+
+    void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                    bool &HasAux, int Aux[],
+                                    std::string &TypeName);
+    void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                bool &HasAux, int Aux[],
+                                std::string &TypeName);
+
+    void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                        bool &HasAux, int Aux[],
+                                        std::string &TypeName);
+    void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo);
+
+    unsigned GetTypeDebugNumber(std::string &Type);
+    short getStorageClass(DIGlobalVariable DIGV);
     void EmitFunctBeginDI(const Function *F);
-    void Init(Module &M);
     void EmitCompositeTypeDecls(Module &M);
+    void EmitCompositeTypeElements (DICompositeType CTy,
+                                    std::string UniqueSuffix);
     void EmitFunctEndDI(const Function *F, unsigned Line);
     void EmitAuxEntry(const std::string VarName, int Aux[], 
-                      int num = PIC16Dbg::AuxSize, std::string tag = "");
+                      int num = PIC16Dbg::AuxSize, std::string TagName = "");
     inline void EmitSymbol(std::string Name, short Class, 
                            unsigned short Type = PIC16Dbg::T_NULL, 
                            unsigned long Value = 0);
     void EmitVarDebugInfo(Module &M);
-    void EmitFileDirective(Module &M);
-    void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true);
     void EmitEOF();
-    void SetFunctBeginLine(unsigned line);
   };
 } // end namespace llvm;
 #endif
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ba465f3..f113a48 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -31,42 +31,72 @@ static const char *getIntrinsicName(unsigned opcode) {
   std::string Basename;
   switch(opcode) {
   default: assert (0 && "do not know intrinsic name");
+  // Arithmetic Right shift for integer types.
   case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
   case RTLIB::SRA_I16: Basename = "sra.i16"; break;
   case RTLIB::SRA_I32: Basename = "sra.i32"; break;
 
+  // Left shift for integer types.
   case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
   case RTLIB::SHL_I16: Basename = "sll.i16"; break;
   case RTLIB::SHL_I32: Basename = "sll.i32"; break;
 
+  // Logical Right Shift for integer types.
   case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
   case RTLIB::SRL_I16: Basename = "srl.i16"; break;
   case RTLIB::SRL_I32: Basename = "srl.i32"; break;
 
+  // Multiply for integer types.
   case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
   case RTLIB::MUL_I16: Basename = "mul.i16"; break;
   case RTLIB::MUL_I32: Basename = "mul.i32"; break;
 
+  // Signed division for integers.
   case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
   case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
+
+  // Unsigned division for integers.
   case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
   case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
 
+  // Signed Modulas for integers.
   case RTLIB::SREM_I16: Basename = "srem.i16"; break;
   case RTLIB::SREM_I32: Basename = "srem.i32"; break;
+
+  // Unsigned Modulas for integers.
   case RTLIB::UREM_I16: Basename = "urem.i16"; break;
   case RTLIB::UREM_I32: Basename = "urem.i32"; break;
 
-  case RTLIB::FPTOSINT_F32_I32:
-               Basename = "f32_to_si32"; break;
-  case RTLIB::SINTTOFP_I32_F32:
-               Basename = "si32_to_f32"; break;
+  //////////////////////
+  // LIBCALLS FOR FLOATS
+  //////////////////////
+
+  // Float to signed integrals
+  case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break;
+
+  // Signed integrals to float. char and int are first sign extended to i32 
+  // before being converted to float, so an I8_F32 or I16_F32 isn't required.
+  case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break;
+
+  // Float to Unsigned conversions.
+  // Signed conversion can be used for unsigned conversion as well.
+  // In signed and unsigned versions only the interpretation of the 
+  // MSB is different. Bit representation remains the same. 
+  case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break;
+
+  // Unsigned to Float conversions. char and int are first zero extended 
+  // before being converted to float.
+  case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break;
                
+  // Floating point add, sub, mul, div.
   case RTLIB::ADD_F32: Basename = "add.f32"; break;
   case RTLIB::SUB_F32: Basename = "sub.f32"; break;
   case RTLIB::MUL_F32: Basename = "mul.f32"; break;
   case RTLIB::DIV_F32: Basename = "div.f32"; break;
-  
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -83,7 +113,7 @@ static const char *getIntrinsicName(unsigned opcode) {
 // PIC16TargetLowering Constructor.
 PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   : TargetLowering(TM), TmpSize(0) {
-  
+ 
   Subtarget = &TM.getSubtarget<PIC16Subtarget>();
 
   addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
@@ -114,6 +144,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   // Signed division lib call names
   setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
   setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
+
   // Unsigned division lib call names
   setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
   setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
@@ -121,15 +152,36 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   // Signed remainder lib call names
   setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
   setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
+
   // Unsigned remainder lib call names
   setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
   setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
  
-  // Floating point operations
+  // Floating point to signed int conversions.
+  setLibcallName(RTLIB::FPTOSINT_F32_I8, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I8));
+  setLibcallName(RTLIB::FPTOSINT_F32_I16, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I16));
   setLibcallName(RTLIB::FPTOSINT_F32_I32, 
                  getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
+
+  // Signed int to floats.
   setLibcallName(RTLIB::SINTTOFP_I32_F32, 
                  getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
+
+  // Floating points to unsigned ints.
+  setLibcallName(RTLIB::FPTOUINT_F32_I8, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I8));
+  setLibcallName(RTLIB::FPTOUINT_F32_I16, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I16));
+  setLibcallName(RTLIB::FPTOUINT_F32_I32, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I32));
+
+  // Unsigned int to floats.
+  setLibcallName(RTLIB::UINTTOFP_I32_F32, 
+                 getIntrinsicName(RTLIB::UINTTOFP_I32_F32));
+
+  // Floating point add, sub, mul ,div.
   setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
   setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
   setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index bda6326..d4f46a4 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -37,6 +37,11 @@ X("pic16", "PIC16 14-bit [experimental].");
 static RegisterTarget<CooperTargetMachine> 
 Y("cooper", "PIC16 Cooper [experimental].");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializePIC16Target() { }
+}
+
 // PIC16TargetMachine - Traditional PIC16 Machine.
 PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
                                        bool Cooper)
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 7723982..c7bfb6d 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -646,19 +646,10 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
 bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "DwarfWriter is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
-
-  // GNU as handles section names wrapped in quotes
-  Mang->setUseQuotes(true);
-
   SwitchToSection(TAI->getTextSection());
-
   return Result;
 }
 
@@ -875,18 +866,9 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
   O << "\t.machine " << CPUDirectives[Directive] << '\n';
 
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
-  // We need this for Personality functions.
-  // AsmPrinter::doInitialization should have done this analysis.
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "DwarfWriter is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
-
-  // Darwin wants symbols to be quoted if they have complex names.
-  Mang->setUseQuotes(true);
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
@@ -1202,3 +1184,9 @@ namespace {
 
 extern "C" int PowerPCAsmPrinterForceLink;
 int PowerPCAsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializePowerPCAsmPrinter() { }
+}
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
index c69e591..ebffd69 100644
--- a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
@@ -19,59 +19,19 @@
 using namespace llvm;
 using namespace llvm::dwarf;
 
-PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM):
+PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM) :
   PPCTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   PCSymbol = ".";
   CommentString = ";";
-  GlobalPrefix = "_";
-  PrivateGlobalPrefix = "L";
-  LessPrivateGlobalPrefix = "l";
-  StringConstantPrefix = "\1LC";
   ConstantPoolSection = "\t.const\t";
-  JumpTableDataSection = ".const";
-  CStringSection = "\t.cstring";
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
-  HasSingleParameterDotFile = false;
-  SwitchToSectionDirective = "\t.section ";
   UsedDirective = "\t.no_dead_strip\t";
-  WeakDefDirective = "\t.weak_definition ";
-  WeakRefDirective = "\t.weak_reference ";
-  HiddenDirective = "\t.private_extern ";
   SupportsExceptionHandling = true;
-  NeedsIndirectEncoding = true;
-  NeedsSet = true;
-  BSSSection = 0;
   
   DwarfEHFrameSection =
-  ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+   ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
   DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
   GlobalEHDirective = "\t.globl\t";
   SupportsWeakOmittedEHFrame = false;
-
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
-  
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
 }
 
 /// PreferredEHDataFormat - This hook allows the target to select data
@@ -131,7 +91,7 @@ PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) :
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 
   PCSymbol = ".";
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index ef3f0fc..3e89885 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -35,6 +35,11 @@ X("ppc32", "PowerPC 32");
 static RegisterTarget<PPC64TargetMachine>
 Y("ppc64", "PowerPC 64");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializePowerPCTarget() { }
+}
+
 // No assembler printer by default
 PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
 
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 61707f5..6a2fdca 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "asm-printer"
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
@@ -24,8 +25,6 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
@@ -88,6 +87,7 @@ FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
   return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
 
+
 /// runOnMachineFunction - This uses the printInstruction()
 /// method to print assembly for each instruction.
 ///
@@ -353,3 +353,17 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
   return false;
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      SparcTargetMachine::registerAsmPrinter(createSparcCodePrinterPass);
+    }
+  } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeSparcAsmPrinter() { }
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index eda0309..fd0f124 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -18,17 +18,18 @@
 #include "llvm/Target/TargetMachineRegistry.h"
 using namespace llvm;
 
-/// SparcTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int SparcTargetMachineModule;
-int SparcTargetMachineModule = 0;
-
 // Register the target.
 static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
 
+// No assembler printer by default
+SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeSparcTarget() { }
+}
+
 const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
   // FIXME: Handle Solaris subtarget someday :)
   return new SparcELFTargetAsmInfo(*this);
@@ -89,6 +90,8 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose));
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 40b44f2..8afcc73 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -35,6 +35,14 @@ class SparcTargetMachine : public LLVMTargetMachine {
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
   
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+  
 public:
   SparcTargetMachine(const Module &M, const std::string &FS);
 
@@ -56,6 +64,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+  
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
index 6a2de6f..3f5f1bd 100644
--- a/lib/Target/TargetAsmInfo.cpp
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -24,10 +24,10 @@
 #include "llvm/Support/Dwarf.h"
 #include <cctype>
 #include <cstring>
-
 using namespace llvm;
 
-void TargetAsmInfo::fillDefaultValues() {
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
+: TM(tm) {
   BSSSection = "\t.bss";
   BSSSection_ = 0;
   ReadOnlySection = 0;
@@ -58,6 +58,7 @@ void TargetAsmInfo::fillDefaultValues() {
   InlineAsmEnd = "#NO_APP";
   AssemblerDialect = 0;
   StringConstantPrefix = ".str";
+  AllowQuotesInName = false;
   ZeroDirective = "\t.zero\t";
   ZeroDirectiveSuffix = 0;
   AsciiDirective = "\t.ascii\t";
@@ -102,7 +103,6 @@ void TargetAsmInfo::fillDefaultValues() {
   SupportsExceptionHandling = false;
   DwarfRequiresFrameSection = true;
   DwarfUsesInlineInfoSection = false;
-  SupportsMacInfoSection = true;
   NonLocalEHFrameLabel = false;
   GlobalEHDirective = 0;
   SupportsWeakOmittedEHFrame = true;
@@ -118,7 +118,7 @@ void TargetAsmInfo::fillDefaultValues() {
   DwarfLocSection = ".debug_loc";
   DwarfARangesSection = ".debug_aranges";
   DwarfRangesSection = ".debug_ranges";
-  DwarfMacInfoSection = ".debug_macinfo";
+  DwarfMacroInfoSection = ".debug_macinfo";
   DwarfEHFrameSection = ".eh_frame";
   DwarfExceptionSection = ".gcc_except_table";
   AsmTransCBE = 0;
@@ -126,11 +126,6 @@ void TargetAsmInfo::fillDefaultValues() {
   DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable);
 }
 
-TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
-  : TM(tm) {
-  fillDefaultValues();
-}
-
 TargetAsmInfo::~TargetAsmInfo() {
 }
 
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index dbd03d8..368bcaa 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 
 add_partially_linked_object(LLVMX86AsmPrinter
   X86ATTAsmPrinter.cpp
+  X86ATTInstPrinter.cpp
   X86AsmPrinter.cpp
   X86IntelAsmPrinter.cpp
   )
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index 8afe2ea..60ed4f0 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -26,8 +26,10 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmInfo.h"
@@ -36,6 +38,9 @@ using namespace llvm;
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
+static cl::opt<bool> NewAsmPrinter("experimental-asm-printer",
+                                   cl::Hidden);
+
 static std::string getPICLabelString(unsigned FnNum,
                                      const TargetAsmInfo *TAI,
                                      const X86Subtarget* Subtarget) {
@@ -266,7 +271,7 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
 
   // Emit post-function debug information.
-  if (TAI->doesSupportDebugInformation())
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
     DW->EndFunction(&MF);
 
   // Print out jump tables referenced by the function.
@@ -291,6 +296,136 @@ static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
   return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static;
 }
 
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: assert(0 && "Unknown pcrel immediate operand");
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
+    return;
+      
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    decorateName(Name, GV);
+    
+    bool needCloseParen = false;
+    if (Name[0] == '$') {
+      // The name begins with a dollar-sign. In order to avoid having it look
+      // like an integer immediate to the assembler, enclose it in parens.
+      O << '(';
+      needCloseParen = true;
+    }
+    
+    if (shouldPrintStub(TM, Subtarget)) {
+      // Link-once, declaration, or Weakly-linked global variables need
+      // non-lazily-resolved stubs
+      if (GV->isDeclaration() || GV->isWeakForLinker()) {
+        // Dynamically-resolved functions need a stub for the function.
+        if (isa<Function>(GV)) {
+          // Function stubs are no longer needed for Mac OS X 10.5 and up.
+          if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
+            O << Name;
+          } else {
+            FnStubs.insert(Name);
+            printSuffixedName(Name, "$stub");
+          }
+        } else if (GV->hasHiddenVisibility()) {
+          if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+            // Definition is not definitely in the current translation unit.
+            O << Name;
+          else {
+            HiddenGVStubs.insert(Name);
+            printSuffixedName(Name, "$non_lazy_ptr");
+          }
+        } else {
+          GVStubs.insert(Name);
+          printSuffixedName(Name, "$non_lazy_ptr");
+        }
+      } else {
+        if (GV->hasDLLImportLinkage())
+          O << "__imp_";
+        O << Name;
+      }
+    } else {
+      if (GV->hasDLLImportLinkage()) {
+        O << "__imp_";
+      }
+      O << Name;
+      
+      if (shouldPrintPLT(TM, Subtarget)) {
+        // Assemble call via PLT for externally visible symbols
+        if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+            !GV->hasLocalLinkage())
+          O << "@PLT";
+      }
+      if (Subtarget->isTargetCygMing() && GV->isDeclaration())
+        // Save function name for later type emission
+        FnStubs.insert(Name);
+    }
+    
+    if (GV->hasExternalWeakLinkage())
+      ExtWeakSymbols.insert(GV);
+    
+    printOffset(MO.getOffset());
+    
+    if (needCloseParen)
+      O << ')';
+    return;
+  }
+      
+  case MachineOperand::MO_ExternalSymbol: {
+    bool needCloseParen = false;
+    std::string Name(TAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    // Print function stub suffix unless it's Mac OS X 10.5 and up.
+    if (shouldPrintStub(TM, Subtarget) && 
+        !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+      FnStubs.insert(Name);
+      printSuffixedName(Name, "$stub");
+      return;
+    }
+    
+    if (Name[0] == '$') {
+      // The name begins with a dollar-sign. In order to avoid having it look
+      // like an integer immediate to the assembler, enclose it in parens.
+      O << '(';
+      needCloseParen = true;
+    }
+    
+    O << Name;
+    
+    if (shouldPrintPLT(TM, Subtarget)) {
+      std::string GOTName(TAI->getGlobalPrefix());
+      GOTName+="_GLOBAL_OFFSET_TABLE_";
+      if (Name == GOTName)
+        // HACK! Emit extra offset to PC during printing GOT offset to
+        // compensate for the size of popl instruction. The resulting code
+        // should look like:
+        //   call .piclabel
+        // piclabel:
+        //   popl %some_register
+        //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
+        O << " + [.-"
+          << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
+      
+      O << "@PLT";
+    }
+    
+    if (needCloseParen)
+      O << ')';
+    
+    return;
+  }
+  }
+}
+
 void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                     const char *Modifier, bool NotRIPRel) {
   const MachineOperand &MO = MI->getOperand(OpNo);
@@ -312,14 +447,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
   case MachineOperand::MO_Immediate:
     if (!Modifier || (strcmp(Modifier, "debug") &&
-                      strcmp(Modifier, "mem") &&
-                      strcmp(Modifier, "call")))
+                      strcmp(Modifier, "mem")))
       O << '$';
     O << MO.getImm();
     return;
-  case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
-    return;
   case MachineOperand::MO_JumpTableIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     if (!isMemOp) O << '$';
@@ -359,8 +490,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     return;
   }
   case MachineOperand::MO_GlobalAddress: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    bool isMemOp = Modifier && !strcmp(Modifier, "mem");
     bool needCloseParen = false;
 
     const GlobalValue *GV = MO.getGlobal();
@@ -369,7 +499,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       // If GV is an alias then use the aliasee for determining
       // thread-localness.
       if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-        GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+        GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
     }
 
     bool isThreadLocal = GVar && GVar->isThreadLocal();
@@ -377,7 +507,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     std::string Name = Mang->getValueName(GV);
     decorateName(Name, GV);
 
-    if (!isMemOp && !isCallOp)
+    if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
       // The name begins with a dollar-sign. In order to avoid having it look
@@ -391,15 +521,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       // non-lazily-resolved stubs
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
         // Dynamically-resolved functions need a stub for the function.
-        if (isCallOp && isa<Function>(GV)) {
-          // Function stubs are no longer needed for Mac OS X 10.5 and up.
-          if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
-            O << Name;
-          } else {
-            FnStubs.insert(Name);
-            printSuffixedName(Name, "$stub");
-          }
-        } else if (GV->hasHiddenVisibility()) {
+        if (GV->hasHiddenVisibility()) {
           if (!GV->isDeclaration() && !GV->hasCommonLinkage())
             // Definition is not definitely in the current translation unit.
             O << Name;
@@ -417,25 +539,12 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         O << Name;
       }
 
-      if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
+      if (TM.getRelocationModel() == Reloc::PIC_)
         O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
     } else {
-      if (GV->hasDLLImportLinkage()) {
+      if (GV->hasDLLImportLinkage())
         O << "__imp_";
-      }
       O << Name;
-
-      if (isCallOp) {
-        if (shouldPrintPLT(TM, Subtarget)) {
-          // Assemble call via PLT for externally visible symbols
-          if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
-              !GV->hasLocalLinkage())
-            O << "@PLT";
-        }
-        if (Subtarget->isTargetCygMing() && GV->isDeclaration())
-          // Save function name for later type emission
-          FnStubs.insert(Name);
-      }
     }
 
     if (GV->hasExternalWeakLinkage())
@@ -443,6 +552,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
     printOffset(MO.getOffset());
 
+    if (needCloseParen)
+      O << ')';
+    
+    bool isRIPRelative = false;
     if (isThreadLocal) {
       TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
       switch (model) {
@@ -456,7 +569,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       case TLSModel::InitialExec:
         if (Subtarget->is64Bit()) {
           assert (!NotRIPRel);
-          O << "@GOTTPOFF(%rip)";
+          O << "@GOTTPOFF";
+          isRIPRelative = true;
         } else {
           O << "@INDNTPOFF";
         }
@@ -476,43 +590,33 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
           O << "@GOT";
         else
           O << "@GOTOFF";
-      } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) {
+      } else if (Subtarget->isPICStyleRIPRel() &&
+                 !NotRIPRel) {
         if (TM.getRelocationModel() != Reloc::Static) {
           if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
             O << "@GOTPCREL";
-
-          if (needCloseParen) {
-            needCloseParen = false;
-            O << ')';
-          }
         }
-
-        // Use rip when possible to reduce code size, except when
-        // index or base register are also part of the address. e.g.
-        // foo(%rip)(%rcx,%rax,4) is not legal
-        O << "(%rip)";
+        
+        isRIPRelative = true;
       }
     }
 
-    if (needCloseParen)
-      O << ')';
-
+    // Use rip when possible to reduce code size, except when
+    // index or base register are also part of the address. e.g.
+    // foo(%rip)(%rcx,%rax,4) is not legal.
+    if (isRIPRelative)
+      O << "(%rip)";
+    
     return;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     bool needCloseParen = false;
     std::string Name(TAI->getGlobalPrefix());
     Name += MO.getSymbolName();
+
     // Print function stub suffix unless it's Mac OS X 10.5 and up.
-    if (isCallOp && shouldPrintStub(TM, Subtarget) && 
-        !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
-      FnStubs.insert(Name);
-      printSuffixedName(Name, "$stub");
-      return;
-    }
-    if (!isMemOp && !isCallOp)
+    if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
       // The name begins with a dollar-sign. In order to avoid having it look
@@ -536,17 +640,13 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
         O << " + [.-"
           << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-
-      if (isCallOp)
-        O << "@PLT";
     }
 
     if (needCloseParen)
       O << ')';
 
-    if (!isCallOp && Subtarget->isPICStyleRIPRel())
+    if (Subtarget->isPICStyleRIPRel())
       O << "(%rip)";
-
     return;
   }
   default:
@@ -673,8 +773,7 @@ void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
     printBasicBlockLabel(MBB, false, false, false);
 }
 
-bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
-                                         const char Mode) {
+bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) {
   unsigned Reg = MO.getReg();
   switch (Mode) {
   default: return true;  // Unknown mode.
@@ -758,38 +857,85 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg64.
+  for (unsigned i = 0; i != 4; ++i) {
+    if (!MI->getOperand(i).isReg()) continue;
+    
+    unsigned Reg = MI->getOperand(i).getReg();
+    if (Reg == 0) continue;
+    
+    MI->getOperand(i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+  }
+}
+
 /// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
 /// AT&T syntax to the current output stream.
 ///
 void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
 
+  if (NewAsmPrinter) {
+    if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+      O << "\t";
+      printInlineAsm(MI);
+      return;
+    } else if (MI->isLabel()) {
+      printLabel(MI);
+      return;
+    } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) {
+      printDeclare(MI);
+      return;
+    } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+      printImplicitDef(MI);
+      return;
+    }
+    
+    O << "NEW: ";
+    MCInst TmpInst;
+    
+    TmpInst.setOpcode(MI->getOpcode());
+    
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      
+      MCOperand MCOp;
+      if (MO.isReg()) {
+        MCOp.MakeReg(MO.getReg());
+      } else if (MO.isImm()) {
+        MCOp.MakeImm(MO.getImm());
+      } else if (MO.isMBB()) {
+        MCOp.MakeMBBLabel(getFunctionNumber(), MO.getMBB()->getNumber());
+      } else {
+        assert(0 && "Unimp");
+      }
+      
+      TmpInst.addOperand(MCOp);
+    }
+    
+    switch (TmpInst.getOpcode()) {
+    case X86::LEA64_32r:
+      // Handle the 'subreg rewriting' for the lea64_32mem operand.
+      lower_lea64_32mem(&TmpInst, 1);
+      break;
+    }
+    
+    // FIXME: Convert TmpInst.
+    printInstruction(&TmpInst);
+    O << "OLD: ";
+  }
+  
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
 }
 
 /// doInitialization
 bool X86ATTAsmPrinter::doInitialization(Module &M) {
-
-  bool Result = AsmPrinter::doInitialization(M);
-
-  if (TAI->doesSupportDebugInformation()) {
-    // Let PassManager know we need debug information and relay
-    // the MachineModuleInfo address on to DwarfWriter.
-    // AsmPrinter::doInitialization did this analysis.
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) 
     MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-    DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->BeginModule(&M, MMI, O, this, TAI);
-  }
-
-  // Darwin wants symbols to be quoted if they have complex names.
-  if (Subtarget->isTargetDarwin())
-    Mang->setUseQuotes(true);
-
-  return Result;
+  return AsmPrinter::doInitialization(M);
 }
 
-
 void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   const TargetData *TD = TM.getTargetData();
 
@@ -1040,8 +1186,8 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
     }
 
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
 
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
@@ -1060,12 +1206,12 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
     }
 
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
   } else if (Subtarget->isTargetELF()) {
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
   }
 
   return AsmPrinter::doFinalization(M);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index 5b40e73..68a6bc8 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -27,16 +27,16 @@
 namespace llvm {
 
 class MachineJumpTableInfo;
+class MCInst;
 
 class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
-  DwarfWriter *DW;
   MachineModuleInfo *MMI;
   const X86Subtarget *Subtarget;
  public:
   explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
                             bool V)
-    : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {
+    : AsmPrinter(O, TM, T, OL, V), MMI(0) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
   }
 
@@ -63,10 +63,62 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   /// machine instruction was sufficiently described to print it, otherwise it
   /// returns false.
   bool printInstruction(const MachineInstr *MI);
+  
+  
+  // New MCInst printing stuff.
+  bool printInstruction(const MCInst *MI);
+
+  void printOperand(const MCInst *MI, unsigned OpNo,
+                    const char *Modifier = 0, bool NotRIPRel = false);
+  void printMemReference(const MCInst *MI, unsigned Op);
+  void printLeaMemReference(const MCInst *MI, unsigned Op);
+  void printSSECC(const MCInst *MI, unsigned Op);
+  void printPICLabel(const MCInst *MI, unsigned Op);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printlea32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  
+  
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
                     const char *Modifier = 0, bool NotRIPRel = false);
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
@@ -104,7 +156,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
     printLeaMemReference(MI, OpNo, "subreg64");
   }
 
-  bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+  bool printAsmMRegister(const MachineOperand &MO, char Mode);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode);
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 0000000..9d50edc
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,143 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/MC/MCInst.h"
+#include "X86ATTAsmPrinter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define MachineInstr MCInst
+#define NO_ASM_WRITER_BOILERPLATE
+#include "X86GenAsmWriter.inc"
+#undef MachineInstr
+
+void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+
+void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) {
+  assert(0 &&
+         "This is only used for MOVPC32r, should lower before asm printing!");
+}
+
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  
+  if (Op.isImm())
+    O << Op.getImm();
+  else if (Op.isMBBLabel())
+    // FIXME: Keep in sync with printBasicBlockLabel.  printBasicBlockLabel
+    // should eventually call into this code, not the other way around.
+    O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction()
+      << '_' << Op.getMBBLabelBlock();
+  else
+    assert(0 && "Unknown pcrel immediate operand");
+}
+
+
+void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                    const char *Modifier, bool NotRIPRel) {
+  assert(Modifier == 0 && "Modifiers should not be used");
+  
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << '%';
+    unsigned Reg = Op.getReg();
+#if 0
+    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+      MVT VT = (strcmp(Modifier+6,"64") == 0) ?
+      MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+                  ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+      Reg = getX86SubSuperRegister(Reg, VT);
+    }
+#endif
+    O << TRI->getAsmName(Reg);
+    return;
+  } else if (Op.isImm()) {
+    //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem")))
+    O << '$';
+    O << Op.getImm();
+    return;
+  }
+  
+  O << "<<UNKNOWN OPERAND KIND>>";
+}
+
+void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
+  bool NotRIPRel = false;
+
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  
+  NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
+  if (DispSpec.isImm()) {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+      O << DispVal;
+  } else {
+    abort();
+    //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+    //       DispSpec.isJTI() || DispSpec.isSymbol());
+    //printOperand(MI, Op+3, "mem", NotRIPRel);
+  }
+  
+  if (IndexReg.getReg() || BaseReg.getReg()) {
+    // There are cases where we can end up with ESP/RSP in the indexreg slot.
+    // If this happens, swap the base/index register to support assemblers that
+    // don't work when the index is *SP.
+    // FIXME: REMOVE THIS.
+    assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP);
+    
+    O << '(';
+    if (BaseReg.getReg())
+      printOperand(MI, Op);
+    
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
+
+void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+  const MCOperand &Segment = MI->getOperand(Op+4);
+  if (Segment.getReg()) {
+    printOperand(MI, Op+4);
+    O << ':';
+  }
+  printLeaMemReference(MI, Op);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index c874849..a39203b 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -29,13 +29,11 @@ FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
                                              bool verbose) {
   const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
 
-  if (Subtarget->isFlavorIntel()) {
+  if (Subtarget->isFlavorIntel())
     return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
                                   OptLevel, verbose);
-  } else {
-    return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                OptLevel, verbose);
-  }
+  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+                              OptLevel, verbose);
 }
 
 namespace {
@@ -48,3 +46,9 @@ namespace {
 
 extern "C" int X86AsmPrinterForceLink;
 int X86AsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeX86AsmPrinter() { }
+}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
index 6599349..ceae7be 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -223,9 +223,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
   case MachineOperand::MO_Immediate:
     O << MO.getImm();
     return;
-  case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
-    return;
   case MachineOperand::MO_JumpTableIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     if (!isMemOp) O << "OFFSET ";
@@ -243,14 +240,13 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
     return;
   }
   case MachineOperand::MO_GlobalAddress: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     GlobalValue *GV = MO.getGlobal();
     std::string Name = Mang->getValueName(GV);
 
     decorateName(Name, GV);
 
-    if (!isMemOp && !isCallOp) O << "OFFSET ";
+    if (!isMemOp) O << "OFFSET ";
     if (GV->hasDLLImportLinkage()) {
       // FIXME: This should be fixed with full support of stdcall & fastcall
       // CC's
@@ -261,8 +257,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
     return;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    if (!isCallOp) O << "OFFSET ";
     O << TAI->getGlobalPrefix() << MO.getSymbolName();
     return;
   }
@@ -271,6 +265,39 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
   }
 }
 
+void X86IntelAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo){
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: assert(0 && "Unknown pcrel immediate operand");
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    printBasicBlockLabel(MO.getMBB());
+    return;
+    
+  case MachineOperand::MO_GlobalAddress: {
+    GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    decorateName(Name, GV);
+    
+    if (GV->hasDLLImportLinkage()) {
+      // FIXME: This should be fixed with full support of stdcall & fastcall
+      // CC's
+      O << "__imp_";
+    }
+    O << Name;
+    printOffset(MO.getOffset());
+    return;
+  }
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << TAI->getGlobalPrefix() << MO.getSymbolName();
+    return;
+  }
+}
+
+
 void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI,
                                               unsigned Op,
                                               const char *Modifier) {
@@ -339,8 +366,8 @@ void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
 }
 
 void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
-  O << "\"L" << getFunctionNumber() << "$pb\"\n";
-  O << "\"L" << getFunctionNumber() << "$pb\":";
+  O << "L" << getFunctionNumber() << "$pb\n";
+  O << "L" << getFunctionNumber() << "$pb:";
 }
 
 bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
@@ -362,7 +389,7 @@ bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
     break;
   }
 
-  O << '%' << TRI->getName(Reg);
+  O << TRI->getName(Reg);
   return false;
 }
 
@@ -414,7 +441,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
 
   Mang->markCharUnacceptable('.');
 
-  O << "\t.686\n\t.model flat\n\n";
+  O << "\t.686\n\t.MMX\n\t.XMM\n\t.model flat\n\n";
 
   // Emit declarations for external functions.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -422,7 +449,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
       std::string Name = Mang->getValueName(I);
       decorateName(Name, I);
 
-      O << "\textern " ;
+      O << "\tEXTERN " ;
       if (I->hasDLLImportLinkage()) {
         O << "__imp_";
       }
@@ -436,7 +463,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
     if (I->isDeclaration()) {
       std::string Name = Mang->getValueName(I);
 
-      O << "\textern " ;
+      O << "\tEXTERN " ;
       if (I->hasDLLImportLinkage()) {
         O << "__imp_";
       }
@@ -471,14 +498,14 @@ bool X86IntelAsmPrinter::doFinalization(Module &M) {
     case GlobalValue::WeakAnyLinkage:
     case GlobalValue::WeakODRLinkage:
       SwitchToDataSection("");
-      O << name << "?\tsegment common 'COMMON'\n";
+      O << name << "?\tSEGEMNT PARA common 'COMMON'\n";
       bCustomSegment = true;
       // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
       // are also available.
       break;
     case GlobalValue::AppendingLinkage:
       SwitchToDataSection("");
-      O << name << "?\tsegment public 'DATA'\n";
+      O << name << "?\tSEGMENT PARA public 'DATA'\n";
       bCustomSegment = true;
       // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
       // are also available.
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
index 9520d98..04f2595 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -52,6 +52,9 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
       printOp(MO, Modifier);
     }
   }
+  
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
+
 
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     O << "BYTE PTR ";
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 3796aac..4464878 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1858,8 +1858,23 @@ Ideal output:
 	setne	%al
 	ret
 
-We could do this transformation in instcombine, but it's only clearly
-beneficial on platforms with a test instruction.
+This should definitely be done in instcombine, canonicalizing the range
+condition into a != condition.  We get this IR:
+
+define i32 @a(i32 %x) nounwind readnone {
+entry:
+	%0 = and i32 %x, 127		; <i32> [#uses=1]
+	%1 = icmp ugt i32 %0, 31		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+Instcombine prefers to strength reduce relational comparisons to equality
+comparisons when possible, this should be another case of that.  This could
+be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
+looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
+be redesigned to use ComputeMaskedBits and friends.
+
 
 //===---------------------------------------------------------------------===//
 Testcase:
@@ -1880,20 +1895,40 @@ Ideal output:
 
 Testcase:
 int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+int y(int a) { return (a & 0x80) *2; }
 
-Current output:
+Current:
 	testl	$128, 4(%esp)
 	setne	%al
 	movzbl	%al, %eax
 	shll	$8, %eax
 	ret
 
-Ideal output:
+Better:
 	movl	4(%esp), %eax
 	addl	%eax, %eax
 	andl	$256, %eax
 	ret
 
-We generally want to fold shifted tests of a single bit into a shift+and on x86.
+This is another general instcombine transformation that is profitable on all
+targets.  In LLVM IR, these functions look like this:
+
+define i32 @x(i32 %a) nounwind readnone {
+entry:
+	%0 = and i32 %a, 128
+	%1 = icmp eq i32 %0, 0
+	%iftmp.0.0 = select i1 %1, i32 0, i32 256
+	ret i32 %iftmp.0.0
+}
+
+define i32 @y(i32 %a) nounwind readnone {
+entry:
+	%0 = shl i32 %a, 1
+	%1 = and i32 %0, 256
+	ret i32 %1
+}
+
+Replacing an icmp+select with a shift should always be considered profitable in
+instcombine.
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 0f2fbcc..ed4eb44 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -991,8 +991,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   case X86::FpSET_ST0_32:
   case X86::FpSET_ST0_64:
   case X86::FpSET_ST0_80:
-    assert((StackTop == 1 || StackTop == 2)
-           && "Stack should have one or two element on it to return!");
+    // FpSET_ST0_80 is generated by copyRegToReg for both function return
+    // and inline assembly with the "st" constrain. In the latter case,
+    // it is possible for FP0 to be alive after this instruction.
+    if (!MI->killsRegister(X86::FP0)) {
+      // Duplicate ST0
+      duplicateToTop(0, 0, I);
+    }
     --StackTop;   // "Forget" we have something on the top of stack!
     break;
   case X86::FpSET_ST1_32:
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b003efd..9cedafc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -167,6 +167,8 @@ namespace {
                     SDValue &Segment);
     bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp);
+    bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+                       SDValue &Scale, SDValue &Index, SDValue &Disp);
     bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
                              SDValue N, SDValue &Base, SDValue &Scale,
                              SDValue &Index, SDValue &Disp,
@@ -1293,6 +1295,32 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
   return false;
 }
 
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+                                        SDValue &Scale, SDValue &Index,
+                                        SDValue &Disp) {
+  assert(Op.getOpcode() == X86ISD::TLSADDR);
+  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+  
+  X86ISelAddressMode AM;
+  AM.GV = GA->getGlobal();
+  AM.Disp += GA->getOffset();
+  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
+  
+  if (N.getValueType() == MVT::i32) {
+    AM.Scale = 1;
+    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+  } else {
+    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+  }
+  
+  SDValue Segment;
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+
 bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
                                   SDValue &Base, SDValue &Scale,
                                   SDValue &Index, SDValue &Disp,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 36e3ab2..8d0ea66 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -788,8 +788,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::USUBO, MVT::i64, Custom);
   setOperationAction(ISD::SMULO, MVT::i32, Custom);
   setOperationAction(ISD::SMULO, MVT::i64, Custom);
-  setOperationAction(ISD::UMULO, MVT::i32, Custom);
-  setOperationAction(ISD::UMULO, MVT::i64, Custom);
 
   if (!Subtarget->is64Bit()) {
     // These libcalls are not available in 32-bit.
@@ -4439,9 +4437,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
   // exec)
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
-                                             GA->getValueType(0),
-                                             GA->getOffset());
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+                                           GA->getOffset());
   SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec)
@@ -8474,6 +8471,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       }
     }
     return;
+  case 'K':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
   case 'N':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 255) {
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index dc15e4a..063913f 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -19,6 +19,14 @@
 
 // 64-bits but only 32 bits are significant.
 def i64i32imm  : Operand<i64>;
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+  let PrintMethod = "print_pcrel_imm";
+}
+
+
 // 64-bits but only 8 bits are significant.
 def i64i8imm   : Operand<i64>;
 
@@ -29,6 +37,7 @@ def lea64mem : Operand<i64> {
 
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printlea64_32mem";
+  let AsmOperandLowerMethod = "lower_lea64_32mem";
   let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
 }
 
@@ -39,6 +48,9 @@ def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
                         [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
                         []>;
 
+def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments.
 //
@@ -113,9 +125,9 @@ let isCall = 1 in
     // NOTE: this pattern doesn't match "X86call imm", because we do not know
     // that the offset between an arbitrary immediate and the call will fit in
     // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : I<0xE8, RawFrm,
-                          (outs), (ins i64i32imm:$dst, variable_ops),
-                          "call\t${dst:call}", []>,
+    def CALL64pcrel32 : Ii32<0xE8, RawFrm,
+                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                          "call\t$dst", []>,
                         Requires<[In64BitMode]>;
     def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
                           "call\t{*}$dst", [(X86call GR64:$dst)]>;
@@ -177,6 +189,15 @@ def PUSH64r  : I<0x50, AddRegFrm,
                  (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
 }
 
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
+                     "push{q}\t$imm", []>;
+def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{q}\t$imm", []>;
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+                      "push{q}\t$imm", []>;
+}
+
 let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
 def POPFQ    : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W;
 let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
@@ -1312,13 +1333,13 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
     Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym),
+def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
                    ".byte\t0x66; "
-                   "leaq\t${sym:mem}(%rip), %rdi; "
+                   "leaq\t$sym(%rip), %rdi; "
                    ".word\t0x6666; "
                    "rex64; "
                    "call\t__tls_get_addr@PLT",
-                  [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+                  [(X86tlsaddr tls64addr:$sym)]>,
                   Requires<[In64BitMode]>;
 
 let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 50ae417..2d8f55f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -163,6 +163,11 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 // X86 Operand Definitions.
 //
 
+def i32imm_pcrel : Operand<i32> {
+  let PrintMethod = "print_pcrel_imm";
+}
+
+
 // *mem - Operand definitions for the funky X86 addressing mode operands.
 //
 class X86MemOperand<string printMethod> : Operand<iPTR> {
@@ -206,8 +211,10 @@ def i16i8imm  : Operand<i16>;
 // 32-bits but only 8 bits are significant.
 def i32i8imm  : Operand<i32>;
 
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT>;
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT> {
+  let PrintMethod = "print_pcrel_imm";
+}
 
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
@@ -217,6 +224,8 @@ def brtarget : Operand<OtherVT>;
 def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
 def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
                                [add, sub, mul, shl, or, frameindex], []>;
+def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
@@ -561,8 +570,9 @@ let isCall = 1 in
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
               XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
       Uses = [ESP] in {
-    def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
-                           "call\t${dst:call}", []>;
+    def CALLpcrel32 : Ii32<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
+                           "call\t$dst", []>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
                         "call\t{*}$dst", [(X86call GR32:$dst)]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
@@ -587,7 +597,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
 
-  def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call}  # TAILCALL",
+  def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst  # TAILCALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst  # TAILCALL",
@@ -611,6 +621,15 @@ let mayStore = 1 in
 def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
 }
 
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH32i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
+                     "push{l}\t$imm", []>;
+def PUSH32i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{l}\t$imm", []>;
+def PUSH32i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+                      "push{l}\t$imm", []>;
+}
+
 let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in
 def POPFD    : I<0x9D, RawFrm, (outs), (ins), "popf", []>;
 let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
@@ -1726,13 +1745,13 @@ let isTwoAddress = 0 in {
 let Defs = [EFLAGS] in {
 let Uses = [CL] in {
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shl{b}\t{%cl, $dst|$dst, %CL}",
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (shl GR8:$src, CL))]>;
 def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
-                 "shl{w}\t{%cl, $dst|$dst, %CL}",
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
 def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
-                 "shl{l}\t{%cl, $dst|$dst, %CL}",
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (shl GR32:$src, CL))]>;
 } // Uses = [CL]
 
@@ -1753,13 +1772,13 @@ def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b}\t{%cl, $dst|$dst, %CL}",
+                   "shl{b}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w}\t{%cl, $dst|$dst, %CL}",
+                   "shl{w}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l}\t{%cl, $dst|$dst, %CL}",
+                   "shl{l}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1788,13 +1807,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shr{b}\t{%cl, $dst|$dst, %CL}",
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (srl GR8:$src, CL))]>;
 def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
-                 "shr{w}\t{%cl, $dst|$dst, %CL}",
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
 def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
-                 "shr{l}\t{%cl, $dst|$dst, %CL}",
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (srl GR32:$src, CL))]>;
 }
 
@@ -1822,14 +1841,14 @@ def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b}\t{%cl, $dst|$dst, %CL}",
+                   "shr{b}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w}\t{%cl, $dst|$dst, %CL}",
+                   "shr{w}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
                    OpSize;
   def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l}\t{%cl, $dst|$dst, %CL}",
+                   "shr{l}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1857,13 +1876,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "sar{b}\t{%cl, $dst|$dst, %CL}",
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (sra GR8:$src, CL))]>;
 def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
-                 "sar{w}\t{%cl, $dst|$dst, %CL}",
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
 def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
-                 "sar{l}\t{%cl, $dst|$dst, %CL}",
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (sra GR32:$src, CL))]>;
 }
 
@@ -1892,13 +1911,13 @@ def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b}\t{%cl, $dst|$dst, %CL}",
+                   "sar{b}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w}\t{%cl, $dst|$dst, %CL}",
+                   "sar{w}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
-                   "sar{l}\t{%cl, $dst|$dst, %CL}",
+                   "sar{l}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1929,13 +1948,13 @@ let isTwoAddress = 0 in {
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "rol{b}\t{%cl, $dst|$dst, %CL}",
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (rotl GR8:$src, CL))]>;
 def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
-                 "rol{w}\t{%cl, $dst|$dst, %CL}",
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
 def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
-                 "rol{l}\t{%cl, $dst|$dst, %CL}",
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (rotl GR32:$src, CL))]>;
 }
 
@@ -1963,13 +1982,13 @@ def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b}\t{%cl, $dst|$dst, %CL}",
+                   "rol{b}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w}\t{%cl, $dst|$dst, %CL}",
+                   "rol{w}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l}\t{%cl, $dst|$dst, %CL}",
+                   "rol{l}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1998,13 +2017,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "ror{b}\t{%cl, $dst|$dst, %CL}",
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (rotr GR8:$src, CL))]>;
 def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
-                 "ror{w}\t{%cl, $dst|$dst, %CL}",
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
 def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
-                 "ror{l}\t{%cl, $dst|$dst, %CL}",
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (rotr GR32:$src, CL))]>;
 }
 
@@ -2032,13 +2051,13 @@ def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b}\t{%cl, $dst|$dst, %CL}",
+                   "ror{b}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
   def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w}\t{%cl, $dst|$dst, %CL}",
+                   "ror{w}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
-                   "ror{l}\t{%cl, $dst|$dst, %CL}",
+                   "ror{l}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -2070,17 +2089,17 @@ let isTwoAddress = 0 in {
 // Double shift instructions (generalizations of rotate)
 let Uses = [CL] in {
 def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
 def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
                    TB, OpSize;
 def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
                    TB, OpSize;
 }
@@ -2115,11 +2134,11 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                      [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
                        addr:$dst)]>, TB;
   def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
                       addr:$dst)]>, TB;
   }
@@ -2138,11 +2157,11 @@ let isTwoAddress = 0 in {
 
   let Uses = [CL] in {
   def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                      [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
                        addr:$dst)]>, TB, OpSize;
   def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
                       addr:$dst)]>, TB, OpSize;
   }
@@ -3095,11 +3114,11 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [ESP, EBX] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym),
-                  "leal\t${sym:mem}(,%ebx,1), %eax; "
+    Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
+                  "leal\t$sym, %eax; "
                   "call\t___tls_get_addr@PLT",
-                  [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+                  [(X86tlsaddr tls32addr:$sym)]>,
                   Requires<[In32BitMode]>;
 
 let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b44c7a6..5d6ef36 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3027,6 +3027,12 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
           (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
 }
 
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+          (SHUFPSrri VR128:$src2, VR128:$src1, 
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+
 // Set lowest element and zero upper elements.
 let AddedComplexity = 15 in
 def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 6c0074e..a2f319f 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -662,6 +662,10 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
      TargetFrameInfo::StackGrowsUp ?
      TD->getPointerSize() : -TD->getPointerSize());
 
+  MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+  MachineLocation FPSrc(MachineLocation::VirtualFP);
+  Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+
   if (StackSize) {
     // Show update of SP.
     if (hasFP(MF)) {
@@ -676,7 +680,7 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
       Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
     }
   } else {
-    //FIXME: Verify & implement for FP
+    // FIXME: Verify & implement for FP
     MachineLocation SPDst(StackPtr);
     MachineLocation SPSrc(StackPtr, stackGrowth);
     Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
@@ -711,10 +715,6 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
     MachineLocation FPSrc(FramePtr);
     Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
   }
-
-  MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
-  MachineLocation FPSrc(MachineLocation::VirtualFP);
-  Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
 }
 
 
@@ -729,8 +729,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
                           !Fn->doesNotThrow() ||
                           UnwindTablesMandatory;
-  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
-                 DebugLoc::getUnknownLoc());
+  DebugLoc DL;
 
   // Prepare for frame info.
   unsigned FrameLabelId = 0;
@@ -822,13 +821,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
-  unsigned ReadyLabelId = 0;
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer is ready.
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
-  }
-
   // Skip the callee-saved push instructions.
   while (MBBI != MBB.end() &&
          (MBBI->getOpcode() == X86::PUSH32r ||
@@ -891,8 +883,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
   }
 
-  if (needsFrameMoves)
+  if (needsFrameMoves) {
+    unsigned ReadyLabelId = 0;
+    // Mark effective beginning of when frame pointer is ready.
+    ReadyLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
     emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+  }
 }
 
 void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 46476f2..694b0eb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -173,7 +173,7 @@ public:
   bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
   bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
-  bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; }
+  bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; }
   
   /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
   unsigned getDarwinVers() const { return DarwinVers; }
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
index 5dda5f4..f49ca15 100644
--- a/lib/Target/X86/X86TargetAsmInfo.cpp
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -44,40 +44,25 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
 
   AlignmentIsInBytes = false;
   TextAlignFillValue = 0x90;
-  GlobalPrefix = "_";
+    
+    
   if (!is64Bit)
     Data64bitsDirective = 0;       // we can't emit a 64-bit unit
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
-  PrivateGlobalPrefix = "L";     // Marker for constant pool idxs
-  LessPrivateGlobalPrefix = "l";  // Marker for some ObjC metadata
-  BSSSection = 0;                       // no BSS section.
   ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
   if (TM.getRelocationModel() != Reloc::Static)
     ConstantPoolSection = "\t.const_data";
   else
     ConstantPoolSection = "\t.const\n";
-  JumpTableDataSection = "\t.const\n";
-  CStringSection = "\t.cstring";
-  // FIXME: Why don't always use this section?
-  if (is64Bit) {
+  // FIXME: Why don't we always use this section?
+  if (is64Bit)
     SixteenByteConstantSection = getUnnamedSection("\t.literal16\n",
                                                    SectionFlags::Mergeable);
-  }
   LCOMMDirective = "\t.lcomm\t";
-  SwitchToSectionDirective = "\t.section ";
-  StringConstantPrefix = "\1LC";
   // Leopard and above support aligned common symbols.
   COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9);
   HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
   NonLocalEHFrameLabel = true;
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
   if (is64Bit) {
     PersonalityPrefix = "";
     PersonalitySuffix = "+4@GOTPCREL";
@@ -85,40 +70,18 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
     PersonalityPrefix = "L";
     PersonalitySuffix = "$non_lazy_ptr";
   }
-  NeedsIndirectEncoding = true;
   InlineAsmStart = "## InlineAsm Start";
   InlineAsmEnd = "## InlineAsm End";
   CommentString = "##";
   SetDirective = "\t.set";
   PCSymbol = ".";
   UsedDirective = "\t.no_dead_strip\t";
-  WeakDefDirective = "\t.weak_definition ";
-  WeakRefDirective = "\t.weak_reference ";
-  HiddenDirective = "\t.private_extern ";
   ProtectedDirective = "\t.globl\t";
 
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
-
   SupportsDebugInformation = true;
-  NeedsSet = true;
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+
   DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug";
   DwarfUsesInlineInfoSection = true;
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
 
   // Exceptions handling
   SupportsExceptionHandling = true;
@@ -176,7 +139,7 @@ X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 
   // Exceptions handling
   SupportsExceptionHandling = true;
@@ -259,7 +222,7 @@ X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"dr\"";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\"";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"dr\"";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
 }
 
 unsigned
@@ -340,8 +303,11 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
   GlobalPrefix = "_";
   CommentString = ";";
 
+  InlineAsmStart = "; InlineAsm Start";
+  InlineAsmEnd   = "; InlineAsm End";
+
   PrivateGlobalPrefix = "$";
-  AlignDirective = "\talign\t";
+  AlignDirective = "\tALIGN\t";
   ZeroDirective = "\tdb\t";
   ZeroDirectiveSuffix = " dup(0)";
   AsciiDirective = "\tdb\t";
@@ -353,13 +319,15 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
   HasDotTypeDotSizeDirective = false;
   HasSingleParameterDotFile = false;
 
+  AlignmentIsInBytes = true;
+
   TextSection = getUnnamedSection("_text", SectionFlags::Code);
   DataSection = getUnnamedSection("_data", SectionFlags::Writeable);
 
   JumpTableDataSection = NULL;
   SwitchToSectionDirective = "";
-  TextSectionStartSuffix = "\tsegment 'CODE'";
-  DataSectionStartSuffix = "\tsegment 'DATA'";
+  TextSectionStartSuffix = "\tSEGMENT PARA 'CODE'";
+  DataSectionStartSuffix = "\tSEGMENT PARA 'DATA'";
   SectionEndDirectiveSuffix = "\tends\n";
 }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index dfb055f..53c46c3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -36,6 +36,11 @@ X("x86",    "32-bit X86: Pentium-Pro and above");
 static RegisterTarget<X86_64TargetMachine>
 Y("x86-64", "64-bit X86: EM64T and AMD64");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeX86Target() { }
+}
+
 // No assembler printer by default
 X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index c9a6d8a..ed4c101 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -428,6 +428,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 
 bool XCoreAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   
   if (!FileDirective.empty()) {
     emitFileDirective(FileDirective);
@@ -449,11 +450,6 @@ bool XCoreAsmPrinter::doInitialization(Module &M) {
     }
   }
 
-  // Emit initial debug information.
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  DW->BeginModule(&M, getAnalysisIfAvailable<MachineModuleInfo>(),
-                  O, this, TAI);
   return Result;
 }
 
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
index 5513762..59ad624 100644
--- a/lib/Target/XCore/XCoreTargetAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
 XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
   : ELFTargetAsmInfo(TM),
     Subtarget(TM.getSubtargetImpl()) {
+  SupportsDebugInformation = true;
   TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
   DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable |
                                 SectionFlags::Small);
@@ -64,7 +65,7 @@ XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
   DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 }
 
 const Section*
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 5437c57..cfd3cd3 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,6 +31,11 @@ namespace {
   RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
 }
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeXCoreTarget() { }
+}
+
 const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
   return new XCoreTargetAsmInfo(*this);
 }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 4b85e13..1438b48 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMipo
   LoopExtractor.cpp
   LowerSetJmp.cpp
   MergeFunctions.cpp
+  PartialInlining.cpp
   PartialSpecialization.cpp
   PruneEH.cpp
   RaiseAllocations.cpp
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 9a1b294..cbf3a1d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1667,11 +1667,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     //
     // NOTE: It doesn't make sense to promote non single-value types since we
     // are just replacing static memory to stack memory.
+    //
+    // If the global is in different address space, don't bring it to stack.
     if (!GS.HasMultipleAccessingFunctions &&
         GS.AccessingFunction && !GS.HasNonInstructionUser &&
         GV->getType()->getElementType()->isSingleValueType() &&
         GS.AccessingFunction->getName() == "main" &&
-        GS.AccessingFunction->hasExternalLinkage()) {
+        GS.AccessingFunction->hasExternalLinkage() &&
+        GV->getType()->getAddressSpace() == 0) {
       DOUT << "LOCALIZING GLOBAL: " << *GV;
       Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
       const Type* ElemTy = GV->getType()->getElementType();
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index b3a25540..0b975ae 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -20,10 +20,13 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 using namespace llvm;
 
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
 namespace {
   struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
@@ -132,6 +135,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
   duplicateFunction->replaceAllUsesWith(F);
   duplicateFunction->eraseFromParent();
   
+  ++NumPartialInlined;
+  
   return extractedFunction;
 }
 
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index a81bbdb..8c97b5d 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -82,14 +82,14 @@ void RaiseAllocations::doInitialization(Module &M) {
 
     // Chck to see if we got the expected malloc
     if (TyWeHave != Malloc1Type) {
-      // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc
+      // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
       // This handles the common declaration of: 'void *malloc(unsigned);'
       const FunctionType *Malloc2Type = 
         FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
                           std::vector<const Type*>(1, Type::Int32Ty), false);
       if (TyWeHave != Malloc2Type) {
         // Check to see if the prototype is missing, giving us 
-        // sbyte*(...) * malloc
+        // i8*(...) * malloc
         // This handles the common declaration of: 'void *malloc();'
         const FunctionType *Malloc3Type = 
           FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index c6cf4df..b110f4e 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -108,9 +108,9 @@ namespace {
   class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
     GlobalVariable* Counter;
     Value* ResetValue;
-    const Type* T;
+    const IntegerType* T;
   public:
-    GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
+    GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval);
     virtual ~GlobalRandomCounter();
     virtual void PrepFunction(Function* F);
     virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -121,9 +121,9 @@ namespace {
     GlobalVariable* Counter;
     Value* ResetValue;
     AllocaInst* AI;
-    const Type* T;
+    const IntegerType* T;
   public:
-    GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
+    GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval);
     virtual ~GlobalRandomCounterOpt();
     virtual void PrepFunction(Function* F);
     virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -193,7 +193,7 @@ static void getBackEdges(Function& F, T& BackEdges);
 // Methods of choosing when to profile
 ///////////////////////////////////////
   
-GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t, 
+GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
                                          uint64_t resetval) : T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval); 
   ResetValue = Init;
@@ -229,7 +229,7 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
   ReplacePhiPred(oldnext, bb, resetblock);
 }
 
-GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t, 
+GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
                                                uint64_t resetval) 
   : AI(0), T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval);
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 42978e7..e9bee64 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -401,8 +401,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
 
 
 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, int->uint, or
-/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
 /// registers that must be created and coalesced.
 ///
 /// Return true if any changes are made.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 673d38b..f4a9898 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cstdio>
 using namespace llvm;
 
@@ -48,7 +49,7 @@ STATISTIC(NumPRELoad,   "Number of loads PRE'd");
 
 static cl::opt<bool> EnablePRE("enable-pre",
                                cl::init(true), cl::Hidden);
-cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 
 //===----------------------------------------------------------------------===//
 //                         ValueTable Class
@@ -952,8 +953,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].second.isClobber())
+  if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+    DEBUG(
+      DOUT << "GVN: non-local load ";
+      WriteAsOperand(*DOUT.stream(), LI);
+      DOUT << " is clobbered by " << *Deps[0].second.getInst();
+    );
     return false;
+  }
   
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
   // where we have a value available in repl, also keep track of whether we see
@@ -1069,6 +1076,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   BasicBlock *TmpBB = LoadBB;
 
   bool isSinglePred = false;
+  bool allSingleSucc = true;
   while (TmpBB->getSinglePredecessor()) {
     isSinglePred = true;
     TmpBB = TmpBB->getSinglePredecessor();
@@ -1078,6 +1086,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       return false;
     if (Blockers.count(TmpBB))
       return false;
+    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+      allSingleSucc = false;
   }
   
   assert(TmpBB);
@@ -1154,7 +1164,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
                 << UnavailablePred->getName() << "': " << *LI);
     return false;
   }
-  
+
+  // Make sure it is valid to move this load here.  We have to watch out for:
+  //  @1 = getelementptr (i8* p, ...
+  //  test p and branch if == 0
+  //  load @1
+  // It is valid to have the getelementptr before the test, even if p can be 0,
+  // as getelementptr only does address arithmetic.
+  // If we are not pushing the value through any multiple-successor blocks
+  // we do not have this case.  Otherwise, check that the load is safe to
+  // put anywhere; this can be improved, but should be conservatively safe.
+  if (!allSingleSucc &&
+      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))
+    return false;
+
   // Okay, we can eliminate this load by inserting a reload in the predecessor
   // and using PHI construction to get the value in the other predecessors, do
   // it.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 38b1198..326fb38 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -17,7 +17,10 @@
 //      which starts at zero and steps by one.
 //   2. The canonical induction variable is guaranteed to be the first PHI node
 //      in the loop header block.
-//   3. Any pointer arithmetic recurrences are raised to use array subscripts.
+//   3. The canonical induction variable is guaranteed to be in a wide enough
+//      type so that IV expressions need not be (directly) zero-extended or
+//      sign-extended.
+//   4. Any pointer arithmetic recurrences are raised to use array subscripts.
 //
 // If the trip count of a loop is computable, this pass also makes the following
 // changes:
@@ -296,11 +299,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
         // If this instruction is dead now, delete it.
         RecursivelyDeleteTriviallyDeadInstructions(Inst);
 
-        // See if this is a single-entry LCSSA PHI node.  If so, we can (and
-        // have to) remove
-        // the PHI entirely.  This is safe, because the NewVal won't be variant
+        // If we're inserting code into the exit block rather than the
+        // preheader, we can (and have to) remove the PHI entirely.
+        // This is safe, because the NewVal won't be variant
         // in the loop, so we don't need an LCSSA phi node anymore.
-        if (NumPreds == 1) {
+        if (ExitBlocks.size() == 1) {
           PN->replaceAllUsesWith(ExitVal);
           RecursivelyDeleteTriviallyDeadInstructions(PN);
           break;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 5465e4a..5bd17e0 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -390,7 +390,7 @@ namespace {
 
     Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
 
-    bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+    bool CanEvaluateInDifferentType(Value *V, const Type *Ty,
                                     unsigned CastOpc, int &NumCastsRemoved);
     unsigned GetOrEnforceKnownAlignment(Value *V,
                                         unsigned PrefAlign = 0);
@@ -654,30 +654,12 @@ static unsigned getOpcode(const Value *V) {
 }
 
 /// AddOne - Add one to a ConstantInt
-static ConstantInt *AddOne(ConstantInt *C) {
-  APInt Val(C->getValue());
-  return ConstantInt::get(++Val);
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
 }
 /// SubOne - Subtract one from a ConstantInt
-static ConstantInt *SubOne(ConstantInt *C) {
-  APInt Val(C->getValue());
-  return ConstantInt::get(--Val);
-}
-/// Add - Add two ConstantInts together
-static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() + C2->getValue());
-}
-/// And - Bitwise AND two ConstantInts together
-static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() & C2->getValue());
-}
-/// Subtract - Subtract one ConstantInt from another
-static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() - C2->getValue());
-}
-/// Multiply - Multiply two ConstantInts together
-static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() * C2->getValue());
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
 }
 /// MultiplyOverflows - True if the multiply can not be expressed in an int
 /// this size.
@@ -774,7 +756,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
 /// SimplifyDemandedBits knows about.  See if the instruction has any
 /// properties that allow us to simplify its operands.
 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
-  unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth();
+  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
   
@@ -830,13 +812,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   const Type *VTy = V->getType();
   assert((TD || !isa<PointerType>(VTy)) &&
          "SimplifyDemandedBits needs to know bit widths!");
-  assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) &&
-         (!isa<IntegerType>(VTy) ||
-          VTy->getPrimitiveSizeInBits() == BitWidth) &&
+  assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+         (!VTy->isIntOrIntVector() ||
+          VTy->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth &&
          KnownOne.getBitWidth() == BitWidth &&
-         "Value *V, DemandedMask, KnownZero and KnownOne \
-          must have same BitWidth");
+         "Value *V, DemandedMask, KnownZero and KnownOne "
+         "must have same BitWidth");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     // We know all of the bits for a constant!
     KnownOne = CI->getValue() & DemandedMask;
@@ -1089,7 +1071,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     RHSKnownZero &= LHSKnownZero;
     break;
   case Instruction::Trunc: {
-    unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
     DemandedMask.zext(truncBf);
     RHSKnownZero.zext(truncBf);
     RHSKnownOne.zext(truncBf);
@@ -1112,7 +1094,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::ZExt: {
     // Compute the bits in the result that are not present in the input.
-    unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
     
     DemandedMask.trunc(SrcBitWidth);
     RHSKnownZero.trunc(SrcBitWidth);
@@ -1130,7 +1112,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   }
   case Instruction::SExt: {
     // Compute the bits in the result that are not present in the input.
-    unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
     
     APInt InputDemandedBits = DemandedMask & 
                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
@@ -1354,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
-        if (DemandedMask.ule(RA))    // srem won't affect demanded bits
+        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
           return I->getOperand(0);
 
         APInt LowBits = RA - 1;
@@ -2087,7 +2069,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       
       // See if SimplifyDemandedBits can simplify this.  This handles stuff like
       // (X & 254)+1 -> (X&254)|1
-      if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+      if (SimplifyDemandedInstructionBits(I))
         return &I;
 
       // zext(i1) - 1  ->  select i1, 0, -1
@@ -2107,7 +2089,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     Value *XorLHS = 0;
     if (isa<ConstantInt>(RHSC) &&
         match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
-      uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits();
+      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
       const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
       
       uint32_t Size = TySizeBits / 2;
@@ -2197,7 +2179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     // X*C1 + X*C2 --> X * (C1+C2)
     ConstantInt *C1;
     if (X == dyn_castFoldableMul(RHS, C1))
-      return BinaryOperator::CreateMul(X, Add(C1, C2));
+      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
   }
 
   // X + X*C --> X * (C+1)
@@ -2262,7 +2244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
     // (X & FF00) + xx00  -> (X+xx00) & FF00
     if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
-      Constant *Anded = And(CRHS, C2);
+      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
       if (Anded == CRHS) {
         // See if all bits from the first bit set in the Add RHS up are included
         // in the mask.  First, get the rightmost bit.
@@ -2290,7 +2272,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   }
 
   // add (cast *A to intptrtype) B -> 
-  //   cast (GEP (cast *A to sbyte*) B)  -->  intptrtype
+  //   cast (GEP (cast *A to i8*) B)  -->  intptrtype
   {
     CastInst *CI = dyn_cast<CastInst>(LHS);
     Value *Other = RHS;
@@ -2299,7 +2281,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       Other = LHS;
     }
     if (CI && CI->getType()->isSized() && 
-        (CI->getType()->getPrimitiveSizeInBits() == 
+        (CI->getType()->getScalarSizeInBits() ==
          TD->getIntPtrType()->getPrimitiveSizeInBits()) 
         && isa<PointerType>(CI->getOperand(0)->getType())) {
       unsigned AS =
@@ -2523,7 +2505,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
         if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
           // C1-(X+C2) --> (C1-C2)-X
-          return BinaryOperator::CreateSub(Subtract(CI1, CI2), 
+          return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2),
                                            Op1I->getOperand(0));
       }
     }
@@ -2564,7 +2546,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       // X - X*C --> X * (1-C)
       ConstantInt *C2 = 0;
       if (dyn_castFoldableMul(Op1I, C2) == Op0) {
-        Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
+        Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
+                                             C2);
         return BinaryOperator::CreateMul(Op0, CP1);
       }
     }
@@ -2589,7 +2572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
     ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
     if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, Subtract(C1, C2));
+      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
   }
   return 0;
 }
@@ -2950,12 +2933,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // (sdiv X, X) --> 1     (udiv X, X) --> 1
   if (Op0 == Op1) {
     if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
-      ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
       std::vector<Constant*> Elts(Ty->getNumElements(), CI);
       return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
     }
 
-    ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+    Constant *CI = ConstantInt::get(I.getType(), 1);
     return ReplaceInstUsesWith(I, CI);
   }
   
@@ -2980,7 +2963,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
             return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
           else 
             return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
-                                          Multiply(RHS, LHSRHS));
+                                          ConstantExpr::getMul(RHS, LHSRHS));
         }
 
     if (!RHS->isZero()) { // avoid X udiv 0
@@ -3513,7 +3496,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   Value *X = Op->getOperand(0);
   Constant *Together = 0;
   if (!Op->isShift())
-    Together = And(AndRHS, OpRHS);
+    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
 
   switch (Op->getOpcode()) {
   case Instruction::Xor:
@@ -3724,7 +3707,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   switch (LHSI->getOpcode()) {
   default: return 0;
   case Instruction::And:
-    if (And(N, Mask) == Mask) {
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
       // If the AndRHS is a power of two minus one (0+1+), this is simple.
       if ((Mask->getValue().countLeadingZeros() + 
            Mask->getValue().countPopulation()) == 
@@ -3748,7 +3731,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
     // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
     if ((Mask->getValue().countLeadingZeros() + 
          Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
-        && And(N, Mask)->isZero())
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
       break;
     return 0;
   }
@@ -3946,10 +3929,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  if (!isa<VectorType>(I.getType())) {
-    if (SimplifyDemandedInstructionBits(I))
-      return &I;
-  } else {
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+  if (isa<VectorType>(I.getType())) {
     if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
       if (CP->isAllOnesValue())            // X & <-1,-1> -> X
         return ReplaceInstUsesWith(I, I.getOperand(0));
@@ -3957,7 +3939,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Op1);  // X & <0,0> -> <0,0>
     }
   }
-  
+
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
     const APInt& AndRHSMask = AndRHS->getValue();
     APInt NotAndRHS(~AndRHSMask);
@@ -4510,7 +4492,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
         Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
                                                      Val->getName()+".off");
         InsertNewInstBefore(Add, I);
-        AddCST = Subtract(AddOne(RHSCst), LHSCst);
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
         return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
       }
       break;                         // (X == 13 | X == 15) -> no change
@@ -4653,18 +4635,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  if (!isa<VectorType>(I.getType())) {
-    if (SimplifyDemandedInstructionBits(I))
-      return &I;
-  } else if (isa<ConstantAggregateZero>(Op1)) {
-    return ReplaceInstUsesWith(I, Op0);  // X | <0,0> -> X
-  } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
-    if (CP->isAllOnesValue())            // X | <-1,-1> -> <-1,-1>
-      return ReplaceInstUsesWith(I, I.getOperand(1));
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+  if (isa<VectorType>(I.getType())) {
+    if (isa<ConstantAggregateZero>(Op1)) {
+      return ReplaceInstUsesWith(I, Op0);  // X | <0,0> -> X
+    } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+      if (CP->isAllOnesValue())            // X | <-1,-1> -> <-1,-1>
+        return ReplaceInstUsesWith(I, I.getOperand(1));
+    }
   }
-    
 
-  
   // or X, -1 == -1
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     ConstantInt *C1 = 0; Value *X = 0;
@@ -4991,12 +4972,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  if (!isa<VectorType>(I.getType())) {
-    if (SimplifyDemandedInstructionBits(I))
-      return &I;
-  } else if (isa<ConstantAggregateZero>(Op1)) {
-    return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
-  }
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+  if (isa<VectorType>(I.getType()))
+    if (isa<ConstantAggregateZero>(Op1))
+      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
 
   // Is this a ~ operation?
   if (Value *NotOp = dyn_castNotVal(&I)) {
@@ -5083,7 +5063,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
             // Anything in both C1 and C2 is known to be zero, remove it from
             // NewRHS.
-            Constant *CommonBits = And(Op0CI, RHS);
+            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
             NewRHS = ConstantExpr::getAnd(NewRHS, 
                                           ConstantExpr::getNot(CommonBits));
             AddToWorkList(Op0I);
@@ -5247,12 +5227,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
-/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
-/// overflowed for this type.
-static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
-                            ConstantInt *In2, bool IsSigned = false) {
-  Result = cast<ConstantInt>(Add(In1, In2));
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
 
+static bool HasAddOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
   if (IsSigned)
     if (In2->getValue().isNegative())
       return Result->getValue().sgt(In1->getValue());
@@ -5262,12 +5243,32 @@ static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
     return Result->getValue().ult(In1->getValue());
 }
 
-/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
 /// overflowed for this type.
-static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
-                            ConstantInt *In2, bool IsSigned = false) {
-  Result = cast<ConstantInt>(Subtract(In1, In2));
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getAdd(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+      if (HasAddOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasAddOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
 
+static bool HasSubOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
   if (IsSigned)
     if (In2->getValue().isNegative())
       return Result->getValue().slt(In1->getValue());
@@ -5277,6 +5278,29 @@ static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
     return Result->getValue().ugt(In1->getValue());
 }
 
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getSub(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+      if (HasSubOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasSubOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
 /// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
 /// code necessary to compute the offset from the base pointer (without adding
 /// in the base pointer).  Return the result as a signed integer of intptr size.
@@ -5589,7 +5613,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   // Check to see that the input is converted from an integer type that is small
   // enough that preserves all bits.  TODO: check here for "known" sign bits.
   // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
-  unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+  unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
   
   // If this is a uitofp instruction, we need an extra bit to hold the sign.
   bool LHSUnsigned = isa<UIToFPInst>(LHSI);
@@ -5644,7 +5668,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   
   // See if the FP constant is too large for the integer.  For example,
   // comparing an i8 to 300.0.
-  unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+  unsigned IntWidth = IntTy->getScalarSizeInBits();
   
   if (!LHSUnsigned) {
     // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
@@ -5943,9 +5967,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
   unsigned BitWidth = 0;
   if (TD)
-    BitWidth = TD->getTypeSizeInBits(Ty);
-  else if (isa<IntegerType>(Ty))
-    BitWidth = Ty->getPrimitiveSizeInBits();
+    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+  else if (Ty->isIntOrIntVector())
+    BitWidth = Ty->getScalarSizeInBits();
 
   bool isSignBit = false;
 
@@ -6459,7 +6483,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
   // C2 (CI). By solving for X we can turn this into a range check 
   // instead of computing a divide. 
-  ConstantInt *Prod = Multiply(CmpRHS, DivRHS);
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
 
   // Determine if the product overflows by seeing if the product is
   // not equal to the divide. Make sure we do the same kind of divide
@@ -6478,7 +6502,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // overflow variable is set to 0 if it's corresponding bound variable is valid
   // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
   int LoOverflow = 0, HiOverflow = 0;
-  ConstantInt *LoBound = 0, *HiBound = 0;
+  Constant *LoBound = 0, *HiBound = 0;
   
   if (!DivIsSigned) {  // udiv
     // e.g. X/5 op 3  --> [15, 20)
@@ -6966,7 +6990,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
           if (BO->hasOneUse())
             return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                Subtract(RHS, BOp1C));
+                                ConstantExpr::getSub(RHS, BOp1C));
         } else if (RHSV == 0) {
           // Replace ((add A, B) != 0) with (A != -B) if A or B is
           // efficiently invertible, or if the add has just this one use.
@@ -7133,10 +7157,10 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   if (Res2 == CI) {
     // Make sure that sign of the Cmp and the sign of the Cast are the same.
     // For example, we might have:
-    //    %A = sext short %X to uint
-    //    %B = icmp ugt uint %A, 1330
+    //    %A = sext i16 %X to i32
+    //    %B = icmp ugt i32 %A, 1330
     // It is incorrect to transform this into 
-    //    %B = icmp ugt short %X, 1330 
+    //    %B = icmp ugt i16 %X, 1330
     // because %A may have negative value. 
     //
     // However, we allow this when the compare is EQ/NE, because they are
@@ -7210,18 +7234,16 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
   if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
     if (CSI->isAllOnesValue())
       return ReplaceInstUsesWith(I, CSI);
-  
+
   // See if we can turn a signed shr into an unsigned shr.
-  if (!isa<VectorType>(I.getType())) {
-    if (MaskedValueIsZero(Op0,
-                      APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
-      return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
-
-    // Arithmetic shifting an all-sign-bit value is a no-op.
-    unsigned NumSignBits = ComputeNumSignBits(Op0);
-    if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())
-      return ReplaceInstUsesWith(I, Op0);
-  }
+  if (MaskedValueIsZero(Op0,
+                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
+
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  unsigned NumSignBits = ComputeNumSignBits(Op0);
+  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+    return ReplaceInstUsesWith(I, Op0);
 
   return 0;
 }
@@ -7250,7 +7272,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
   }
 
   // See if we can fold away this shift.
-  if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+  if (SimplifyDemandedInstructionBits(I))
     return &I;
 
   // Try to fold constant and into select arguments.
@@ -7271,10 +7293,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits();
+  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
   
-  // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr
-  // of a signed value.
+  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+  // a signed shift.
   //
   if (Op1->uge(TypeBits)) {
     if (I.getOpcode() != Instruction::AShr)
@@ -7320,8 +7342,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // part of the register be zeros.  Emulate this by inserting an AND to
       // clear the top bits as needed.  This 'and' will usually be zapped by
       // other xforms later if dead.
-      unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();
-      unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();
+      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+      unsigned DstSize = TI->getType()->getScalarSizeInBits();
       APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
       
       // The mask we constructed says what the trunc would do if occurring
@@ -7729,7 +7751,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
     // If the allocation size is constant, form a constant mul expression
     Amt = ConstantInt::get(Type::Int32Ty, Scale);
     if (isa<ConstantInt>(NumElements))
-      Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
+      Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements),
+                                 cast<ConstantInt>(Amt));
     // otherwise multiply the amount and the number of elements
     else {
       Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
@@ -7788,17 +7811,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
 /// If CastOpc is a sext or zext, we are asking if the low bits of the value can
 /// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
 /// the final result.
-bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
                                               unsigned CastOpc,
                                               int &NumCastsRemoved){
   // We can always evaluate constants in another type.
-  if (isa<ConstantInt>(V))
+  if (isa<Constant>(V))
     return true;
   
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
   
-  const IntegerType *OrigTy = cast<IntegerType>(V->getType());
+  const Type *OrigTy = V->getType();
   
   // If this is an extension or truncate, we can often eliminate it.
   if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
@@ -7836,8 +7859,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
     // If we are truncating the result of this SHL, and if it's a shift of a
     // constant amount, we can always perform a SHL in a smaller type.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t BitWidth = Ty->getBitWidth();
-      if (BitWidth < OrigTy->getBitWidth() && 
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (BitWidth < OrigTy->getScalarSizeInBits() &&
           CI->getLimitedValue(BitWidth) < BitWidth)
         return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
                                           NumCastsRemoved);
@@ -7848,8 +7871,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
     // lshr iff we know that the bits we would otherwise be shifting in are
     // already zeros.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t OrigBitWidth = OrigTy->getBitWidth();
-      uint32_t BitWidth = Ty->getBitWidth();
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
       if (BitWidth < OrigBitWidth &&
           MaskedValueIsZero(I->getOperand(0),
             APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
@@ -8131,8 +8154,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   Value *Src = CI.getOperand(0);
   const Type *SrcTy = Src->getType();
   const Type *DestTy = CI.getType();
-  uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits();
+  uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
 
   // See if we can simplify any instructions used by the LHS whose sole 
   // purpose is to compute bits we don't care about.
@@ -8151,8 +8174,9 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
       // Only do this if the dest type is a simple type, don't convert the
       // expression tree to something weird like i93 unless the source is also
       // strange.
-      (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) &&
-      CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
+      (isSafeIntegerType(DestTy->getScalarType()) ||
+       !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+      CanEvaluateInDifferentType(SrcI, DestTy,
                                  CI.getOpcode(), NumCastsRemoved)) {
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.  If this is a zero-extension,
@@ -8350,17 +8374,18 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   
   Value *Src = CI.getOperand(0);
   const Type *Ty = CI.getType();
-  uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits();
-  uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();
+  uint32_t DestBitWidth = Ty->getScalarSizeInBits();
+  uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();
 
   // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
-  if (DestBitWidth == 1) {
+  if (DestBitWidth == 1 &&
+      isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {
     Constant *One = ConstantInt::get(Src->getType(), 1);
     Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
-  
+
   // Optimize trunc(lshr(), c) to pull the shift through the truncate.
   ConstantInt *ShAmtV = 0;
   Value *ShiftOp = 0;
@@ -8403,7 +8428,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 
       Value *In = ICI->getOperand(0);
       Value *Sh = ConstantInt::get(In->getType(),
-                                   In->getType()->getPrimitiveSizeInBits()-1);
+                                   In->getType()->getScalarSizeInBits()-1);
       In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
                                                         In->getName()+".lobit"),
                                CI);
@@ -8494,28 +8519,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     // Get the sizes of the types involved.  We know that the intermediate type
     // will be smaller than A or C, but don't know the relation between A and C.
     Value *A = CSrc->getOperand(0);
-    unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
-    unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
-    unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+    unsigned SrcSize = A->getType()->getScalarSizeInBits();
+    unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+    unsigned DstSize = CI.getType()->getScalarSizeInBits();
     // If we're actually extending zero bits, then if
     // SrcSize <  DstSize: zext(a & mask)
     // SrcSize == DstSize: a & mask
     // SrcSize  > DstSize: trunc(a) & mask
     if (SrcSize < DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      Constant *AndConst = ConstantInt::get(AndValue);
+      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
       Instruction *And =
         BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
       InsertNewInstBefore(And, CI);
       return new ZExtInst(And, CI.getType());
     } else if (SrcSize == DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));
+      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+                                                           AndValue));
     } else if (SrcSize > DstSize) {
       Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
       InsertNewInstBefore(Trunc, CI);
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
-      return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));
+      return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(),
+                                                               AndValue));
     }
   }
 
@@ -8537,6 +8564,33 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     }
   }
 
+  // zext(trunc(t) & C) -> (t & zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+        Value *TI0 = TI->getOperand(0);
+        if (TI0->getType() == CI.getType())
+          return
+            BinaryOperator::CreateAnd(TI0,
+                                      ConstantExpr::getZExt(C, CI.getType()));
+      }
+
+  // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+        if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+            And->getOperand(1) == C)
+          if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+            Value *TI0 = TI->getOperand(0);
+            if (TI0->getType() == CI.getType()) {
+              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");
+              InsertNewInstBefore(NewAnd, *And);
+              return BinaryOperator::CreateXor(NewAnd, ZC);
+            }
+          }
+
   return 0;
 }
 
@@ -8556,9 +8610,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   // eliminate the trunc/sext pair.
   if (getOpcode(Src) == Instruction::Trunc) {
     Value *Op = cast<User>(Src)->getOperand(0);
-    unsigned OpBits   = cast<IntegerType>(Op->getType())->getBitWidth();
-    unsigned MidBits  = cast<IntegerType>(Src->getType())->getBitWidth();
-    unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+    unsigned OpBits   = Op->getType()->getScalarSizeInBits();
+    unsigned MidBits  = Src->getType()->getScalarSizeInBits();
+    unsigned DestBits = CI.getType()->getScalarSizeInBits();
     unsigned NumSignBits = ComputeNumSignBits(Op);
 
     if (OpBits == DestBits) {
@@ -8599,8 +8653,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
       BA == CA && isa<TruncInst>(A)) {
     Value *I = cast<TruncInst>(A)->getOperand(0);
     if (I->getType() == CI.getType()) {
-      unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();
-      unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();
+      unsigned MidSize = Src->getType()->getScalarSizeInBits();
+      unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
       unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
       Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
       I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
@@ -8671,11 +8725,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
       Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
       if (LHSTrunc->getType() != SrcTy && 
           RHSTrunc->getType() != SrcTy) {
-        unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+        unsigned DstSize = CI.getType()->getScalarSizeInBits();
         // If the source types were both smaller than the destination type of
         // the cast, do this xform.
-        if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&
-            RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {
+        if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+            RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
           LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
                                       CI.getType(), CI);
           RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
@@ -8706,7 +8760,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
   // 'X' value would cause an undefined result for the fptoui. 
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
                     OpI->getType()->getFPMantissaWidth())
     return ReplaceInstUsesWith(FI, OpI->getOperand(0));
 
@@ -8726,7 +8780,7 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
   // 'X' value would cause an undefined result for the fptoui. 
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getPrimitiveSizeInBits() <= 
+      (int)FI.getType()->getScalarSizeInBits() <=
                     OpI->getType()->getFPMantissaWidth())
     return ReplaceInstUsesWith(FI, OpI->getOperand(0));
   
@@ -8747,7 +8801,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // trunc to be exposed to other transforms.  Don't do this for extending
   // ptrtoint's, because we don't know if the target sign or zero extends its
   // pointers.
-  if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) {
+  if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
     Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),
                                                     TD->getIntPtrType(),
                                                     "tmp"), CI);
@@ -8763,7 +8817,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // allows the trunc to be exposed to other transforms.  Don't do this for
   // extending inttoptr's, because we don't know if the target sign or zero
   // extends to pointers.
-  if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() >
+  if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
       TD->getPointerSizeInBits()) {
     Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),
                                                  TD->getIntPtrType(),
@@ -9194,7 +9248,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
             (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
           Value *In = ICI->getOperand(0);
           Value *Sh = ConstantInt::get(In->getType(),
-                                       In->getType()->getPrimitiveSizeInBits()-1);
+                                       In->getType()->getScalarSizeInBits()-1);
           In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
                                                           In->getName()+".lobit"),
                                    *ICI);
@@ -9316,7 +9370,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
               // The comparison constant and the result are not neccessarily the
               // same width. Make an all-ones value by inserting a AShr.
               Value *X = IC->getOperand(0);
-              uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
+              uint32_t Bits = X->getType()->getScalarSizeInBits();
               Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
               Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
                                                         ShAmt, "ones");
@@ -10850,8 +10904,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
 static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
                                    Instruction *InsertPoint,
                                    InstCombiner *IC) {
-  unsigned PtrSize = DTy->getPrimitiveSizeInBits();
-  unsigned VTySize = V->getType()->getPrimitiveSizeInBits();
+  unsigned PtrSize = DTy->getScalarSizeInBits();
+  unsigned VTySize = V->getType()->getScalarSizeInBits();
   // We must cast correctly to the pointer type. Ensure that we
   // sign extend the integer value if it is smaller as this is
   // used for address computation.
@@ -10892,7 +10946,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           const Type *SrcTy = CI->getOperand(0)->getType();
           // We can eliminate a cast from i32 to i64 iff the target 
           // is a 32-bit pointer target.
-          if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
+          if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) {
             MadeChange = true;
             *i = CI->getOperand(0);
           }
@@ -11105,7 +11159,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         ConstantInt *Scale = 0;
         if (ArrayEltSize == 1) {
           NewIdx = GEP.getOperand(1);
-          Scale = ConstantInt::get(NewIdx->getType(), 1);
+          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
         } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
           NewIdx = ConstantInt::get(CI->getType(), 1);
           Scale = CI;
@@ -11114,7 +11168,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
               isa<ConstantInt>(Inst->getOperand(1))) {
             ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
             uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
-            Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal);
+            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+                                     1ULL << ShAmtVal);
             NewIdx = Inst->getOperand(0);
           } else if (Inst->getOpcode() == Instruction::Mul &&
                      isa<ConstantInt>(Inst->getOperand(1))) {
@@ -11390,45 +11445,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
   return 0;
 }
 
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap.  If it is not obviously safe to load from the
-/// specified pointer, we do a quick local scan of the basic block containing
-/// ScanFrom, to determine if the address is already accessed.
-static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
-  // If it is an alloca it is always safe to load from.
-  if (isa<AllocaInst>(V)) return true;
-
-  // If it is a global variable it is mostly safe to load from.
-  if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
-    // Don't try to evaluate aliases.  External weak GV can be null.
-    return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
-
-  // Otherwise, be a little bit agressive by scanning the local block where we
-  // want to check to see if the pointer is already being loaded or stored
-  // from/to.  If so, the previous load or store would have already trapped,
-  // so there is no harm doing an extra load (also, CSE will later eliminate
-  // the load entirely).
-  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
-
-  while (BBI != E) {
-    --BBI;
-
-    // If we see a free or a call (which might do a free) the pointer could be
-    // marked invalid.
-    if (isa<FreeInst>(BBI) || 
-        (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)))
-      return false;
-    
-    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
-      if (LI->getOperand(0) == V) return true;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
-      if (SI->getOperand(1) == V) return true;
-    }
-
-  }
-  return false;
-}
-
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index c0ca2df..5a70fc3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -76,7 +76,7 @@ namespace {
     bool ProcessBlock(BasicBlock *BB);
     bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,
                     unsigned JumpThreadCost);
-    BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);
+    BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
     bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
     bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
 
@@ -163,10 +163,10 @@ void JumpThreading::FindLoopHeaders(Function &F) {
 /// This is important for things like "phi i1 [true, true, false, true, x]"
 /// where we only need to clone the block for the true blocks once.
 ///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {
+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
   SmallVector<BasicBlock*, 16> CommonPreds;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == CstVal)
+    if (PN->getIncomingValue(i) == Val)
       CommonPreds.push_back(PN->getIncomingBlock(i));
   
   if (CommonPreds.size() == 1)
@@ -324,10 +324,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     }
   }
 
-  // If there is only a single predecessor of this block, nothing to fold.
-  if (BB->getSinglePredecessor())
-    return false;
-  
   // All the rest of our checks depend on the condition being an instruction.
   if (CondInst == 0)
     return false;
@@ -346,13 +342,36 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
                              CondInst->getOpcode() == Instruction::And))
     return true;
   
-  // If we have "br (phi != 42)" and the phi node has any constant values as 
-  // operands, we can thread through this block.
-  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst))
-    if (isa<PHINode>(CondCmp->getOperand(0)) &&
-        isa<Constant>(CondCmp->getOperand(1)) &&
-        ProcessBranchOnCompare(CondCmp, BB))
-      return true;
+  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+    if (isa<PHINode>(CondCmp->getOperand(0))) {
+      // If we have "br (phi != 42)" and the phi node has any constant values
+      // as operands, we can thread through this block.
+      // 
+      // If we have "br (cmp phi, x)" and the phi node contains x such that the
+      // comparison uniquely identifies the branch target, we can thread
+      // through this block.
+
+      if (ProcessBranchOnCompare(CondCmp, BB))
+        return true;      
+    }
+    
+    // If we have a comparison, loop over the predecessors to see if there is
+    // a condition with the same value.
+    pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+    for (; PI != E; ++PI)
+      if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+        if (PBI->isConditional() && *PI != BB) {
+          if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+            if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+                CI->getOperand(1) == CondCmp->getOperand(1) &&
+                CI->getPredicate() == CondCmp->getPredicate()) {
+              // TODO: Could handle things like (x != 4) --> (x == 17)
+              if (ProcessBranchOnDuplicateCond(*PI, BB))
+                return true;
+            }
+          }
+        }
+  }
 
   // Check for some cases that are worth simplifying.  Right now we want to look
   // for loads that are used by a switch or by the condition for the branch.  If
@@ -770,12 +789,30 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
 }
 
+/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
+/// hand sides of the compare instruction, try to determine the result. If the
+/// result can not be determined, a null pointer is returned.
+static Constant *GetResultOfComparison(CmpInst::Predicate pred,
+                                       Value *LHS, Value *RHS) {
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantExpr::getCompare(pred, CLHS, CRHS);
+
+  if (LHS == RHS)
+    if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
+      return ICmpInst::isTrueWhenEqual(pred) ? 
+                 ConstantInt::getTrue() : ConstantInt::getFalse();
+
+  return 0;
+}
+
 /// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a constant.  If the PHI node contains any constants as inputs, we
-/// can fold the compare for that edge and thread through it.
+/// node and a value.  If we can identify when the comparison is true between
+/// the phi inputs and the value, we can fold the compare for that edge and
+/// thread through it.
 bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
-  Constant *RHS = cast<Constant>(Cmp->getOperand(1));
+  Value *RHS = Cmp->getOperand(1);
   
   // If the phi isn't in the current block, an incoming edge to this block
   // doesn't control the destination.
@@ -784,18 +821,17 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   
   // We can do this simplification if any comparisons fold to true or false.
   // See if any do.
-  Constant *PredCst = 0;
+  Value *PredVal = 0;
   bool TrueDirection = false;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    PredCst = dyn_cast<Constant>(PN->getIncomingValue(i));
-    if (PredCst == 0) continue;
+    PredVal = PN->getIncomingValue(i);
+    
+    Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS);
+    if (!Res) {
+      PredVal = 0;
+      continue;
+    }
     
-    Constant *Res;
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp))
-      Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS);
-    else
-      Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(),
-                                  PredCst, RHS);
     // If this folded to a constant expr, we can't do anything.
     if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
       TrueDirection = ResC->getZExtValue();
@@ -808,11 +844,11 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
     }
     
     // Otherwise, we can't fold this input.
-    PredCst = 0;
+    PredVal = 0;
   }
   
   // If no match, bail out.
-  if (PredCst == 0)
+  if (PredVal == 0)
     return false;
   
   // See if the cost of duplicating this block is low enough.
@@ -825,7 +861,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   
   // If so, we can actually do this threading.  Merge any common predecessors
   // that will act the same.
-  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
   
   // Next, get our successor.
   BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 9c78596..6f7a7f8 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -290,13 +290,13 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {
 
 // Return V+1
 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) {
-  ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
 }
 
 // Return V-1
 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) {
-  ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
 }
 
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 944f409..7579748 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -143,10 +143,10 @@ namespace {
     /// inside the loop then try to eliminate the cast opeation.
     void OptimizeShadowIV(Loop *L);
 
-    /// OptimizeSMax - Rewrite the loop's terminating condition
-    /// if it uses an smax computation.
-    ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
-                           IVStrideUse* &CondUse);
+    /// OptimizeMax - Rewrite the loop's terminating condition
+    /// if it uses a max computation.
+    ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
+                          IVStrideUse* &CondUse);
 
     bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
                            const SCEVHandle *&CondStride);
@@ -336,13 +336,6 @@ namespace {
     /// EmittedBase.
     Value *OperandValToReplace;
 
-    /// isSigned - The stride (and thus also the Base) of this use may be in
-    /// a narrower type than the use itself (OperandValToReplace->getType()).
-    /// When this is the case, the isSigned field indicates whether the
-    /// IV expression should be signed-extended instead of zero-extended to
-    /// fit the type of the use.
-    bool isSigned;
-
     /// Imm - The immediate value that should be added to the base immediately
     /// before Inst, because it will be folded into the imm field of the
     /// instruction.  This is also sometimes used for loop-variant values that
@@ -363,7 +356,6 @@ namespace {
     BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
       : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
         OperandValToReplace(IVSU.getOperandValToReplace()),
-        isSigned(IVSU.isSigned()),
         Imm(SE->getIntegerSCEV(0, Base->getType())), 
         isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
 
@@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
     NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
   }
 
-  if (isSigned)
-    NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);
-  else
-    NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);
-
   return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
 }
 
@@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
   
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    std::vector<SCEVHandle> NewOps;
+    SmallVector<SCEVHandle, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
@@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
     SCEVHandle Start = SARE->getStart();
     MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
     
-    std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+    SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
     Ops[0] = Start;
     Val = SE->getAddRecExpr(Ops, SARE->getLoop());
   } else {
@@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
                                 bool isAddress, Loop *L,
                                 ScalarEvolution *SE) {
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    std::vector<SCEVHandle> NewOps;
+    SmallVector<SCEVHandle, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
@@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
     MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
     
     if (Start != SARE->getStart()) {
-      std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Start;
       Val = SE->getAddRecExpr(Ops, SARE->getLoop());
     }
@@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
 /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
 /// added together.  This is used to reassociate common addition subexprs
 /// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
+static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs,
                              SCEVHandle Expr,
                              ScalarEvolution *SE) {
   if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
@@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
       SubExprs.push_back(Expr);
     } else {
       // Compute the addrec with zero as its base.
-      std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Zero;   // Start with zero base.
       SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
       
@@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   
   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
   // order we see them.
-  std::vector<SCEVHandle> UniqueSubExprs;
+  SmallVector<SCEVHandle, 16> UniqueSubExprs;
 
-  std::vector<SCEVHandle> SubExprs;
+  SmallVector<SCEVHandle, 16> SubExprs;
   unsigned NumUsesInsideLoop = 0;
   for (unsigned i = 0; i != NumUses; ++i) {
     // If the user is outside the loop, just ignore it for base computation.
@@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) {
   return SC->getValue()->getValue().isNegative();
 }
 
-// CollectIVUsers - Transform our list of users and offsets to a bit more
-// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-// of the strided accesses, as well as the old information from Uses. We
-// progressively move information from the Base field to the Imm field, until
-// we eventually have the full access expression to rewrite the use.
+/// CollectIVUsers - Transform our list of users and offsets to a bit more
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
+/// of the strided accesses, as well as the old information from Uses. We
+/// progressively move information from the Base field to the Imm field, until
+/// we eventually have the full access expression to rewrite the use.
 SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
                                               IVUsersOfOneStride &Uses,
                                               Loop *L,
@@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
       if (!isa<PointerType>(NewCmpTy))
         NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
       else {
-        ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
+        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
         NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
       }
       NewOffset = TyBits == NewTyBits
         ? SE->getMulExpr(CondUse->getOffset(),
-                         SE->getConstant(ConstantInt::get(CmpTy, Scale)))
-        : SE->getConstant(ConstantInt::get(NewCmpIntTy,
+                         SE->getConstant(CmpTy, Scale))
+        : SE->getConstant(NewCmpIntTy,
           cast<SCEVConstant>(CondUse->getOffset())->getValue()
-            ->getSExtValue()*Scale));
+            ->getSExtValue()*Scale);
       break;
     }
   }
@@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
     OldCond->replaceAllUsesWith(Cond);
     OldCond->eraseFromParent();
 
-    IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);
+    IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
     CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
     CondStride = NewStride;
     ++NumEliminated;
@@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
   return Cond;
 }
 
-/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
-/// an smax computation.
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
 ///
 /// This is a narrow solution to a specific, but acute, problem. For loops
 /// like this:
@@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 ///     p[i] = 0.0;
 ///   } while (++i < n);
 ///
-/// where the comparison is signed, the trip count isn't just 'n', because
-/// 'n' could be negative. And unfortunately this can come up even for loops
-/// where the user didn't use a C do-while loop. For example, seemingly
-/// well-behaved top-test loops will commonly be lowered like this:
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
 //
 ///   if (n > 0) {
 ///     i = 0;
@@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// test in such a way that indvars can't find it.
 ///
 /// When indvars can't find the if test in loops like this, it creates a
-/// signed-max expression, which allows it to give the loop a canonical
+/// max expression, which allows it to give the loop a canonical
 /// induction variable:
 ///
 ///   i = 0;
-///   smax = n < 1 ? 1 : n;
+///   max = n < 1 ? 1 : n;
 ///   do {
 ///     p[i] = 0.0;
-///   } while (++i != smax);
+///   } while (++i != max);
 ///
 /// Canonical induction variables are necessary because the loop passes
 /// are designed around them. The most obvious example of this is the
@@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
 /// the instructions for the maximum computation.
 ///
-ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
-                                           IVStrideUse* &CondUse) {
+ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
+                                          IVStrideUse* &CondUse) {
   // Check that the loop matches the pattern we're looking for.
   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
       Cond->getPredicate() != CmpInst::ICMP_NE)
@@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
   SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
 
   // Check for a max calculation that matches the pattern.
-  const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
-  if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
+  if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
+    return Cond;
+  const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
+  if (Max != SE->getSCEV(Sel)) return Cond;
+
+  // To handle a max with more than two operands, this optimization would
+  // require additional checking and setup.
+  if (Max->getNumOperands() != 2)
+    return Cond;
 
-  SCEVHandle SMaxLHS = SMax->getOperand(0);
-  SCEVHandle SMaxRHS = SMax->getOperand(1);
-  if (!SMaxLHS || SMaxLHS != One) return Cond;
+  SCEVHandle MaxLHS = Max->getOperand(0);
+  SCEVHandle MaxRHS = Max->getOperand(1);
+  if (!MaxLHS || MaxLHS != One) return Cond;
 
   // Check the relevant induction variable for conformance to
   // the pattern.
@@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
   // Check the right operand of the select, and remember it, as it will
   // be used in the new comparison instruction.
   Value *NewRHS = 0;
-  if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
+  if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
     NewRHS = Sel->getOperand(1);
-  else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
+  else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
     NewRHS = Sel->getOperand(2);
   if (!NewRHS) return Cond;
 
+  // Determine the new comparison opcode. It may be signed or unsigned,
+  // and the original comparison may be either equality or inequality.
+  CmpInst::Predicate Pred =
+    isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
+  if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+    Pred = CmpInst::getInversePredicate(Pred);
+
   // Ok, everything looks ok to change the condition into an SLT or SGE and
   // delete the max calculation.
   ICmpInst *NewCond =
-    new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
-                   CmpInst::ICMP_SLT :
-                   CmpInst::ICMP_SGE,
-                 Cond->getOperand(0), NewRHS, "scmp", Cond);
+    new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
 
   // Delete the max calculation instructions.
   Cond->replaceAllUsesWith(NewCond);
@@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
         
       ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
       if (!Init) continue;
-      ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
 
       BinaryOperator *Incr = 
         dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
 
       /* create new increment. '++d' in above example. */
-      ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
       BinaryOperator *NewIncr = 
         BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
                                  Instruction::FAdd : Instruction::FSub,
@@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
   }
 }
 
-// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
-// uses in the loop, look to see if we can eliminate some, in favor of using
-// common indvars for the different uses.
+/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
+/// uses in the loop, look to see if we can eliminate some, in favor of using
+/// common indvars for the different uses.
 void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
   // TODO: implement optzns here.
 
@@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   // induction variable, to allow coalescing the live ranges for the IV into
   // one register value.
   BasicBlock *LatchBlock = L->getLoopLatch();
-  BasicBlock *ExitBlock = L->getExitingBlock();
-  if (!ExitBlock)
+  BasicBlock *ExitingBlock = L->getExitingBlock();
+  if (!ExitingBlock)
     // Multiple exits, just look at the exit in the latch block if there is one.
-    ExitBlock = LatchBlock;
-  BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());
+    ExitingBlock = LatchBlock;
+  BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
   if (!TermBr)
     return;
   if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
@@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   if (!FindIVUserForCond(Cond, CondUse, CondStride))
     return; // setcc doesn't use the IV.
 
-  if (ExitBlock != LatchBlock) {
+  if (ExitingBlock != LatchBlock) {
     if (!Cond->hasOneUse())
       // See below, we don't want the condition to be cloned.
       return;
@@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
     StrideNoReuse.insert(*CondStride);
   }
 
-  // If the trip count is computed in terms of an smax (due to ScalarEvolution
+  // If the trip count is computed in terms of a max (due to ScalarEvolution
   // being unable to find a sufficient guard, for example), change the loop
-  // comparison to use SLT instead of NE.
-  Cond = OptimizeSMax(L, Cond, CondUse);
+  // comparison to use SLT or ULT instead of NE.
+  Cond = OptimizeMax(L, Cond, CondUse);
 
   // If possible, change stride and operands of the compare instruction to
   // eliminate one stride.
-  if (ExitBlock == LatchBlock)
+  if (ExitingBlock == LatchBlock)
     Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
 
   // It's possible for the setcc instruction to be anywhere in the loop, and
@@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
       
       // Clone the IVUse, as the old use still exists!
       IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
-                                              CondUse->getOperandValToReplace(),
-                                               false);
+                                             CondUse->getOperandValToReplace());
       CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
     }
   }
@@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   ++NumLoopCond;
 }
 
-// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-// when to exit the loop is used only for that purpose, try to rearrange things
-// so it counts down to a test against zero.
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
 void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
   // If the number of times the loop is executed isn't computable, give up.
@@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
   Value *startVal = phi->getIncomingValue(inBlock);
   Value *endVal = Cond->getOperand(1);
   // FIXME check for case where both are constant
-  ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
+  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
   BinaryOperator *NewStartVal = 
     BinaryOperator::Create(Instruction::Sub, endVal, startVal,
                            "tmp", PreInsertPt);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7143c7b..d89790c 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -820,10 +820,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
           StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
         } else {
           // If EltTy is a vector type, get the element type.
-          const Type *ValTy = EltTy;
-          if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
-            ValTy = VTy->getElementType();
-          
+          const Type *ValTy = EltTy->getScalarType();
+
           // Construct an integer with the right value.
           unsigned EltSize = TD->getTypeSizeInBits(ValTy);
           APInt OneVal(EltSize, CI->getZExtValue());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 59989c9..bbcb792 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -135,7 +135,11 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
                                            TD->getIntPtrType(),
                                            PointerType::getUnqual(Type::Int8Ty),
                                            NULL);
-  return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
@@ -164,7 +168,12 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
                                          PointerType::getUnqual(Type::Int8Ty),
                                          Type::Int32Ty, TD->getIntPtrType(),
                                          NULL);
-  return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitMemCmp - Emit a call to the memcmp function.
@@ -182,8 +191,13 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
                                          PointerType::getUnqual(Type::Int8Ty),
                                          PointerType::getUnqual(Type::Int8Ty),
                                          TD->getIntPtrType(), NULL);
-  return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
-                       Len, "memcmp");
+  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+                               Len, "memcmp");
+
+  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitMemSet - Emit a call to the memset function
@@ -217,20 +231,30 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
     NameBuffer[NameLen+1] = 0;
     Name = NameBuffer;
   }
-  
+
   Module *M = Caller->getParent();
-  Value *Callee = M->getOrInsertFunction(Name, Op->getType(), 
+  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                          Op->getType(), NULL);
-  return B.CreateCall(Callee, Op, Name);
+  CallInst *CI = B.CreateCall(Callee, Op, Name);
+
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
 /// is an integer.
 void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty,
-                                    Type::Int32Ty, NULL);
-  B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar");
+  Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty,
+                                          Type::Int32Ty, NULL);
+  CallInst *CI = B.CreateCall(PutChar,
+                              B.CreateIntCast(Char, Type::Int32Ty, "chari"),
+                              "putchar");
+
+  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
 }
 
 /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
@@ -241,10 +265,14 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
 
-  Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
-                                    Type::Int32Ty,
-                                    PointerType::getUnqual(Type::Int8Ty), NULL);
-  B.CreateCall(F, CastToCStr(Str, B), "puts");
+  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+                                       Type::Int32Ty,
+                                       PointerType::getUnqual(Type::Int8Ty),
+                                       NULL);
+  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
 }
 
 /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
@@ -258,12 +286,14 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,
                                Type::Int32Ty, File->getType(), NULL);
-                                         
   else
     F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
                                File->getType(), NULL);
   Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
-  B.CreateCall2(F, Char, File, "fputc");
+  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
 }
 
 /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
@@ -283,7 +313,10 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
     F = M->getOrInsertFunction("fputs", Type::Int32Ty,
                                PointerType::getUnqual(Type::Int8Ty),
                                File->getType(), NULL);
-  B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
 }
 
 /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
@@ -307,8 +340,11 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
                                PointerType::getUnqual(Type::Int8Ty),
                                TD->getIntPtrType(), TD->getIntPtrType(),
                                File->getType(), NULL);
-  B.CreateCall4(F, CastToCStr(Ptr, B), Size, 
-                ConstantInt::get(TD->getIntPtrType(), 1), File);
+  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+                               ConstantInt::get(TD->getIntPtrType(), 1), File);
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
 }
 
 //===----------------------------------------------------------------------===//
@@ -673,12 +709,10 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
     // strcmp(P, "x") -> memcmp(P, "x", 2)
     uint64_t Len1 = GetStringLength(Str1P);
     uint64_t Len2 = GetStringLength(Str2P);
-    if (Len1 || Len2) {
-      // Choose the smallest Len excluding 0 which means 'unknown'.
-      if (!Len1 || (Len2 && Len2 < Len1))
-        Len1 = Len2;
+    if (Len1 && Len2) {
       return EmitMemCmp(Str1P, Str2P,
-                        ConstantInt::get(TD->getIntPtrType(), Len1), B);
+                        ConstantInt::get(TD->getIntPtrType(),
+                        std::min(Len1, Len2)), B);
     }
 
     return 0;
@@ -1039,7 +1073,7 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
         LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
     }
-    
+
     if (LdExpArg) {
       const char *Name;
       if (Op->getType() == Type::FloatTy)
@@ -1056,12 +1090,15 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
       Module *M = Caller->getParent();
       Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                              Op->getType(), Type::Int32Ty,NULL);
-      return B.CreateCall2(Callee, One, LdExpArg);
+      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+        CI->setCallingConv(F->getCallingConv());
+
+      return CI;
     }
     return 0;
   }
 };
-    
 
 //===---------------------------------------===//
 // Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
@@ -1072,7 +1109,7 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
     if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
         FT->getParamType(0) != Type::DoubleTy)
       return 0;
-    
+
     // If this is something like 'floor((double)floatval)', convert to floorf.
     FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
     if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 682d069..34ee57c 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,6 +52,7 @@
 
 #define DEBUG_TYPE "tailcallelim"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) {
 bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
   // FIXME: We can move load/store/call/free instructions above the call if the
   // call does not mod/ref the memory location being processed.
-  if (I->mayHaveSideEffects() || isa<LoadInst>(I))
+  if (I->mayHaveSideEffects())  // This also handles volatile loads.
     return false;
+  
+  if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+    // Loads may always be moved above calls without side effects.
+    if (CI->mayHaveSideEffects()) {
+      // Non-volatile loads may be moved above a call with side effects if it
+      // does not write to memory and the load provably won't trap.
+      // FIXME: Writes to memory only matter if they may alias the pointer
+      // being loaded from.
+      if (CI->mayWriteToMemory() ||
+          !isSafeToLoadUnconditionally(L->getPointerOperand(), L))
+        return false;
+    }
+  }
 
   // Otherwise, if this is a side-effect free instruction, check to make sure
   // that it does not use the return value of the call.  If it doesn't use the
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 94483b8..c7fff54 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
@@ -28,6 +29,50 @@
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
+//  Local analysis.
+//
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap.  If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
+  // If it is an alloca it is always safe to load from.
+  if (isa<AllocaInst>(V)) return true;
+
+  // If it is a global variable it is mostly safe to load from.
+  if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
+    // Don't try to evaluate aliases.  External weak GV can be null.
+    return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
+
+  // Otherwise, be a little bit agressive by scanning the local block where we
+  // want to check to see if the pointer is already being loaded or stored
+  // from/to.  If so, the previous load or store would have already trapped,
+  // so there is no harm doing an extra load (also, CSE will later eliminate
+  // the load entirely).
+  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+  while (BBI != E) {
+    --BBI;
+
+    // If we see a free or a call which may write to memory (i.e. which might do
+    // a free) the pointer could be marked invalid.
+    if (isa<FreeInst>(BBI) || 
+        (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+         !isa<DbgInfoIntrinsic>(BBI)))
+      return false;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      if (LI->getOperand(0) == V) return true;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+      if (SI->getOperand(1) == V) return true;
+    }
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
 
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 3249895..9af47f5 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -112,7 +112,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
     if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
       const Type *AllocTy = MI->getType()->getElementType();
 
-      // malloc(type) becomes sbyte *malloc(size)
+      // malloc(type) becomes i8 *malloc(size)
       Value *MallocArg;
       if (LowerMallocArgToInteger)
         MallocArg = ConstantInt::get(Type::Int64Ty,
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index bcc6b81..ee0f6a6 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -859,6 +859,26 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
   return Changed;
 }
 
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+                                Instruction *I1, Instruction *I2) {
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
 /// BB2, hoist any common code in the two blocks up into the branch block.  The
 /// caller of this function guarantees that BI's block dominates BB1 and BB2.
@@ -879,8 +899,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     I1 = BB1_Itr++;
   while (isa<DbgInfoIntrinsic>(I2))
     I2 = BB2_Itr++;
-  if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || 
-      isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2))
+  if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
+      !I1->isIdenticalTo(I2) ||
+      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
     return false;
 
   // If we get here, we can hoist at least one instruction.
@@ -911,6 +932,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
   return true;
 
 HoistTerminator:
+  // It may not be possible to hoist an invoke.
+  if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+    return true;
+
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
   BIParent->getInstList().insert(BI, NT);
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 6b369b6..73b1ed6 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1384,7 +1384,10 @@ void AssemblyWriter::printFunction(const Function *F) {
   case CallingConv::Fast:         Out << "fastcc "; break;
   case CallingConv::Cold:         Out << "coldcc "; break;
   case CallingConv::X86_StdCall:  Out << "x86_stdcallcc "; break;
-  case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break; 
+  case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+  case CallingConv::ARM_APCS:     Out << "arm_apcscc "; break;
+  case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
+  case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
   default: Out << "cc" << F->getCallingConv() << " "; break;
   }
 
@@ -1640,7 +1643,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::Fast:  Out << " fastcc"; break;
     case CallingConv::Cold:  Out << " coldcc"; break;
     case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
-    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; 
+    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     default: Out << " cc" << CI->getCallingConv(); break;
     }
 
@@ -1688,6 +1694,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::Cold:  Out << " coldcc"; break;
     case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
     case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     default: Out << " cc" << II->getCallingConv(); break;
     }
 
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 1d293cc..3aab0cc 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -208,6 +208,22 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
     }
   }
 
+  // If the cast operand is a constant vector, perform the cast by
+  // operating on each element. In the cast of bitcasts, the element
+  // count may be mismatched; don't attempt to handle that here.
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+    if (isa<VectorType>(DestTy) &&
+        cast<VectorType>(DestTy)->getNumElements() ==
+        CV->getType()->getNumElements()) {
+      std::vector<Constant*> res;
+      const VectorType *DestVecTy = cast<VectorType>(DestTy);
+      const Type *DstEltTy = DestVecTy->getElementType();
+      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+        res.push_back(ConstantExpr::getCast(opc,
+                                            CV->getOperand(i), DstEltTy));
+      return ConstantVector::get(DestVecTy, res);
+    }
+
   // We actually have to do a cast now. Perform the cast according to the
   // opcode specified.
   switch (opc) {
@@ -237,14 +253,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
       APInt Val(DestBitWidth, 2, x);
       return ConstantInt::get(Val);
     }
-    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
-      std::vector<Constant*> res;
-      const VectorType *DestVecTy = cast<VectorType>(DestTy);
-      const Type *DstEltTy = DestVecTy->getElementType();
-      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-        res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(DestVecTy, res);
-    }
     return 0; // Can't fold.
   case Instruction::IntToPtr:   //always treated as unsigned
     if (V->isNullValue())       // Is it an integral null value?
@@ -266,14 +274,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
                                  APFloat::rmNearestTiesToEven);
       return ConstantFP::get(apf);
     }
-    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
-      std::vector<Constant*> res;
-      const VectorType *DestVecTy = cast<VectorType>(DestTy);
-      const Type *DstEltTy = DestVecTy->getElementType();
-      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-        res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(DestVecTy, res);
-    }
     return 0;
   case Instruction::ZExt:
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -629,7 +629,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     }
   }
 
-  // Handle simplifications of the RHS when a constant int.
+  // Handle simplifications when the RHS is a constant int.
   if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
     switch (Opcode) {
     case Instruction::Add:
@@ -773,6 +773,20 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       }
       }
     }
+
+    switch (Opcode) {
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::Shl:
+      if (CI1->equalsInt(0)) return const_cast<Constant*>(C1);
+      break;
+    default:
+      break;
+    }
   } else if (const ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
     if (const ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
       APFloat C1V = CFP1->getValueAPF();
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 69c503d..c164a3b 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -25,6 +25,9 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
@@ -35,6 +38,9 @@ using namespace llvm;
 //                              Constant Class
 //===----------------------------------------------------------------------===//
 
+// Becomes a no-op when multithreading is disabled.
+ManagedStatic<sys::SmartRWMutex<true> > ConstantsLock;
+
 void Constant::destroyConstantImpl() {
   // When a Constant is destroyed, there may be lingering
   // references to the constant by other constants in the constant pool.  These
@@ -269,9 +275,20 @@ typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
                  DenseMapAPIntKeyInfo> IntMapTy;
 static ManagedStatic<IntMapTy> IntConstants;
 
-ConstantInt *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
-  const IntegerType *ITy = cast<IntegerType>(Ty);
-  return get(APInt(ITy->getBitWidth(), V, isSigned));
+ConstantInt *ConstantInt::get(const IntegerType *Ty,
+                              uint64_t V, bool isSigned) {
+  return get(APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return
+      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
 }
 
 // Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
@@ -284,12 +301,35 @@ ConstantInt *ConstantInt::get(const APInt& V) {
   const IntegerType *ITy = IntegerType::get(V.getBitWidth());
   // get an existing value or the insertion position
   DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+  
+  ConstantsLock->reader_acquire();
   ConstantInt *&Slot = (*IntConstants)[Key]; 
-  // if it exists, return it.
-  if (Slot)
+  ConstantsLock->reader_release();
+    
+  if (!Slot) {
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    ConstantInt *&NewSlot = (*IntConstants)[Key]; 
+    if (!Slot) {
+      NewSlot = new ConstantInt(ITy, V);
+    }
+    
+    return NewSlot;
+  } else {
     return Slot;
-  // otherwise create a new one, insert it, and return it.
-  return Slot = new ConstantInt(ITy, V);
+  }
+}
+
+Constant *ConstantInt::get(const Type *Ty, const APInt &V) {
+  ConstantInt *C = ConstantInt::get(V);
+  assert(C->getType() == Ty->getScalarType() &&
+         "ConstantInt type doesn't match the type implied by its value!");
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return
+      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
 }
 
 //===----------------------------------------------------------------------===//
@@ -368,34 +408,54 @@ static ManagedStatic<FPMapTy> FPConstants;
 
 ConstantFP *ConstantFP::get(const APFloat &V) {
   DenseMapAPFloatKeyInfo::KeyTy Key(V);
-  ConstantFP *&Slot = (*FPConstants)[Key];
-  if (Slot) return Slot;
   
-  const Type *Ty;
-  if (&V.getSemantics() == &APFloat::IEEEsingle)
-    Ty = Type::FloatTy;
-  else if (&V.getSemantics() == &APFloat::IEEEdouble)
-    Ty = Type::DoubleTy;
-  else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
-    Ty = Type::X86_FP80Ty;
-  else if (&V.getSemantics() == &APFloat::IEEEquad)
-    Ty = Type::FP128Ty;
-  else {
-    assert(&V.getSemantics() == &APFloat::PPCDoubleDouble&&"Unknown FP format");
-    Ty = Type::PPC_FP128Ty;
+  ConstantsLock->reader_acquire();
+  ConstantFP *&Slot = (*FPConstants)[Key];
+  ConstantsLock->reader_release();
+    
+  if (!Slot) {
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    ConstantFP *&NewSlot = (*FPConstants)[Key];
+    if (!NewSlot) {
+      const Type *Ty;
+      if (&V.getSemantics() == &APFloat::IEEEsingle)
+        Ty = Type::FloatTy;
+      else if (&V.getSemantics() == &APFloat::IEEEdouble)
+        Ty = Type::DoubleTy;
+      else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+        Ty = Type::X86_FP80Ty;
+      else if (&V.getSemantics() == &APFloat::IEEEquad)
+        Ty = Type::FP128Ty;
+      else {
+        assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
+               "Unknown FP format");
+        Ty = Type::PPC_FP128Ty;
+      }
+      NewSlot = new ConstantFP(Ty, V);
+    }
+    
+    return NewSlot;
   }
   
-  return Slot = new ConstantFP(Ty, V);
+  return Slot;
 }
 
 /// get() - This returns a constant fp for the specified value in the
 /// specified type.  This should only be used for simple constant values like
 /// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-ConstantFP *ConstantFP::get(const Type *Ty, double V) {
+Constant *ConstantFP::get(const Type *Ty, double V) {
   APFloat FV(V);
   bool ignored;
-  FV.convert(*TypeToFloatSemantics(Ty), APFloat::rmNearestTiesToEven, &ignored);
-  return get(FV);
+  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return
+      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1093,8 +1153,13 @@ namespace llvm {
     /// AbstractTypeMap - Map for abstract type constants.
     ///
     AbstractTypeMapTy AbstractTypeMap;
+    
+    /// ValueMapLock - Mutex for this map.
+    sys::SmartMutex<true> ValueMapLock;
 
   public:
+    // NOTE: This function is not locked.  It is the caller's responsibility
+    // to enforce proper synchronization.
     typename MapTy::iterator map_end() { return Map.end(); }
     
     /// InsertOrGetItem - Return an iterator for the specified element.
@@ -1102,6 +1167,8 @@ namespace llvm {
     /// entry and Exists=true.  If not, the iterator points to the newly
     /// inserted entry and returns Exists=false.  Newly inserted entries have
     /// I->second == 0, and should be filled in.
+    /// NOTE: This function is not locked.  It is the caller's responsibility
+    // to enforce proper synchronization.
     typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, Constant *>
                                    &InsertVal,
                                    bool &Exists) {
@@ -1131,19 +1198,10 @@ private:
       }
       return I;
     }
-public:
     
-    /// getOrCreate - Return the specified constant from the map, creating it if
-    /// necessary.
-    ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
-      MapKey Lookup(Ty, V);
-      typename MapTy::iterator I = Map.find(Lookup);
-      // Is it in the map?      
-      if (I != Map.end())
-        return static_cast<ConstantClass *>(I->second);  
-
-      // If no preexisting value, create one now...
-      ConstantClass *Result =
+    ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+                          typename MapTy::iterator I) {
+      ConstantClass* Result =
         ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
 
       assert(Result->getType() == Ty && "Type specified is not correct!");
@@ -1151,11 +1209,12 @@ public:
 
       if (HasLargeKey)  // Remember the reverse mapping if needed.
         InverseMap.insert(std::make_pair(Result, I));
-      
-      // If the type of the constant is abstract, make sure that an entry exists
-      // for it in the AbstractTypeMap.
+
+      // If the type of the constant is abstract, make sure that an entry
+      // exists for it in the AbstractTypeMap.
       if (Ty->isAbstract()) {
-        typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(Ty);
+        typename AbstractTypeMapTy::iterator TI = 
+                                                 AbstractTypeMap.find(Ty);
 
         if (TI == AbstractTypeMap.end()) {
           // Add ourselves to the ATU list of the type.
@@ -1164,10 +1223,33 @@ public:
           AbstractTypeMap.insert(TI, std::make_pair(Ty, I));
         }
       }
+      
+      return Result;
+    }
+public:
+    
+    /// getOrCreate - Return the specified constant from the map, creating it if
+    /// necessary.
+    ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+      sys::SmartScopedLock<true> Lock(&ValueMapLock);
+      MapKey Lookup(Ty, V);
+      ConstantClass* Result = 0;
+      
+      typename MapTy::iterator I = Map.find(Lookup);
+      // Is it in the map?  
+      if (I != Map.end())
+        Result = static_cast<ConstantClass *>(I->second);
+        
+      if (!Result) {
+        // If no preexisting value, create one now...
+        Result = Create(Ty, V, I);
+      }
+        
       return Result;
     }
 
     void remove(ConstantClass *CP) {
+      sys::SmartScopedLock<true> Lock(&ValueMapLock);
       typename MapTy::iterator I = FindExistingElement(CP);
       assert(I != Map.end() && "Constant not found in constant table!");
       assert(I->second == CP && "Didn't find correct element?");
@@ -1221,6 +1303,8 @@ public:
     /// MoveConstantToNewSlot - If we are about to change C to be the element
     /// specified by I, update our internal data structures to reflect this
     /// fact.
+    /// NOTE: This function is not locked. It is the responsibility of the
+    /// caller to enforce proper synchronization if using this method.
     void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
       // First, remove the old location of the specified constant in the map.
       typename MapTy::iterator OldI = FindExistingElement(C);
@@ -1250,6 +1334,7 @@ public:
     }
     
     void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+      sys::SmartScopedLock<true> Lock(&ValueMapLock);
       typename AbstractTypeMapTy::iterator I =
         AbstractTypeMap.find(cast<Type>(OldTy));
 
@@ -1314,12 +1399,15 @@ static char getValType(ConstantAggregateZero *CPZ) { return 0; }
 ConstantAggregateZero *ConstantAggregateZero::get(const Type *Ty) {
   assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
          "Cannot create an aggregate zero of non-aggregate type!");
+  
+  // Implicitly locked.
   return AggZeroConstants->getOrCreate(Ty, 0);
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantAggregateZero::destroyConstant() {
+  // Implicitly locked.
   AggZeroConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1359,18 +1447,24 @@ Constant *ConstantArray::get(const ArrayType *Ty,
   // If this is an all-zero array, return a ConstantAggregateZero object
   if (!V.empty()) {
     Constant *C = V[0];
-    if (!C->isNullValue())
+    if (!C->isNullValue()) {
+      // Implicitly locked.
       return ArrayConstants->getOrCreate(Ty, V);
+    }
     for (unsigned i = 1, e = V.size(); i != e; ++i)
-      if (V[i] != C)
+      if (V[i] != C) {
+        // Implicitly locked.
         return ArrayConstants->getOrCreate(Ty, V);
+      }
   }
+  
   return ConstantAggregateZero::get(Ty);
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantArray::destroyConstant() {
+  // Implicitly locked.
   ArrayConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1482,6 +1576,7 @@ Constant *ConstantStruct::get(const StructType *Ty,
   // Create a ConstantAggregateZero value if all elements are zeros...
   for (unsigned i = 0, e = V.size(); i != e; ++i)
     if (!V[i]->isNullValue())
+      // Implicitly locked.
       return StructConstants->getOrCreate(Ty, V);
 
   return ConstantAggregateZero::get(Ty);
@@ -1498,6 +1593,7 @@ Constant *ConstantStruct::get(const std::vector<Constant*> &V, bool packed) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantStruct::destroyConstant() {
+  // Implicitly locked.
   StructConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1552,6 +1648,8 @@ Constant *ConstantVector::get(const VectorType *Ty,
     return ConstantAggregateZero::get(Ty);
   if (isUndef)
     return UndefValue::get(Ty);
+    
+  // Implicitly locked.
   return VectorConstants->getOrCreate(Ty, V);
 }
 
@@ -1563,6 +1661,7 @@ Constant *ConstantVector::get(const std::vector<Constant*> &V) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantVector::destroyConstant() {
+  // Implicitly locked.
   VectorConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1627,12 +1726,14 @@ static char getValType(ConstantPointerNull *) {
 
 
 ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+  // Implicitly locked.
   return NullPtrConstants->getOrCreate(Ty, 0);
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantPointerNull::destroyConstant() {
+  // Implicitly locked.
   NullPtrConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1670,12 +1771,14 @@ static char getValType(UndefValue *) {
 
 
 UndefValue *UndefValue::get(const Type *Ty) {
+  // Implicitly locked.
   return UndefValueConstants->getOrCreate(Ty, 0);
 }
 
 // destroyConstant - Remove the constant from the constant table.
 //
 void UndefValue::destroyConstant() {
+  // Implicitly locked.
   UndefValueConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1690,15 +1793,18 @@ MDString::MDString(const char *begin, const char *end)
 static ManagedStatic<StringMap<MDString*> > MDStringCache;
 
 MDString *MDString::get(const char *StrBegin, const char *StrEnd) {
-  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(StrBegin,
-                                                                      StrEnd);
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(
+                                        StrBegin, StrEnd);
   MDString *&S = Entry.getValue();
   if (!S) S = new MDString(Entry.getKeyData(),
                            Entry.getKeyData() + Entry.getKeyLength());
+
   return S;
 }
 
 void MDString::destroyConstant() {
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
   MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd));
   destroyConstantImpl();
 }
@@ -1724,18 +1830,27 @@ MDNode *MDNode::get(Value*const* Vals, unsigned NumVals) {
   for (unsigned i = 0; i != NumVals; ++i)
     ID.AddPointer(Vals[i]);
 
+  ConstantsLock->reader_acquire();
   void *InsertPoint;
-  if (MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint))
-    return N;
-
-  // InsertPoint will have been set by the FindNodeOrInsertPos call.
-  MDNode *N = new(0) MDNode(Vals, NumVals);
-  MDNodeSet->InsertNode(N, InsertPoint);
+  MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
+  ConstantsLock->reader_release();
+  
+  if (!N) {
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
+    if (!N) {
+      // InsertPoint will have been set by the FindNodeOrInsertPos call.
+      N = new(0) MDNode(Vals, NumVals);
+      MDNodeSet->InsertNode(N, InsertPoint);
+    }
+  }
   return N;
 }
 
 void MDNode::destroyConstant() {
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock); 
   MDNodeSet->RemoveNode(this);
+  
   destroyConstantImpl();
 }
 
@@ -1902,6 +2017,8 @@ static inline Constant *getFoldedCast(
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> argVec(1, C);
   ExprMapKeyType Key(opc, argVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(Ty, Key);
 }
  
@@ -1932,19 +2049,19 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
 } 
 
 Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
-  if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return getCast(Instruction::BitCast, C, Ty);
   return getCast(Instruction::ZExt, C, Ty);
 }
 
 Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
-  if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return getCast(Instruction::BitCast, C, Ty);
   return getCast(Instruction::SExt, C, Ty);
 }
 
 Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
-  if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return getCast(Instruction::BitCast, C, Ty);
   return getCast(Instruction::Trunc, C, Ty);
 }
@@ -1960,9 +2077,10 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
 
 Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
                                        bool isSigned) {
-  assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  assert(C->getType()->isIntOrIntVector() &&
+         Ty->isIntOrIntVector() && "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::Trunc :
@@ -1971,10 +2089,10 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
 }
 
 Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && 
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   if (SrcBits == DstBits)
     return C; // Avoid a useless cast
   Instruction::CastOps opcode =
@@ -1983,42 +2101,67 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
 }
 
 Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
-  assert(C->getType()->isInteger() && "Trunc operand must be integer");
-  assert(Ty->isInteger() && "Trunc produces only integral");
-  assert(C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer");
+  assert(Ty->isIntOrIntVector() && "Trunc produces only integral");
+  assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "SrcTy must be larger than DestTy for Trunc!");
 
   return getFoldedCast(Instruction::Trunc, C, Ty);
 }
 
 Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
-  assert(C->getType()->isInteger() && "SEXt operand must be integral");
-  assert(Ty->isInteger() && "SExt produces only integer");
-  assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral");
+  assert(Ty->isIntOrIntVector() && "SExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for SExt!");
 
   return getFoldedCast(Instruction::SExt, C, Ty);
 }
 
 Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
-  assert(C->getType()->isInteger() && "ZEXt operand must be integral");
-  assert(Ty->isInteger() && "ZExt produces only integer");
-  assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral");
+  assert(Ty->isIntOrIntVector() && "ZExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for ZExt!");
 
   return getFoldedCast(Instruction::ZExt, C, Ty);
 }
 
 Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
-         C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+         C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "This is an illegal floating point truncation!");
   return getFoldedCast(Instruction::FPTrunc, C, Ty);
 }
 
 Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
-         C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+         C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "This is an illegal floating point extension!");
   return getFoldedCast(Instruction::FPExt, C, Ty);
 }
@@ -2136,6 +2279,8 @@ Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
 
   std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
   ExprMapKeyType Key(Opcode, argVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2188,34 +2333,30 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
   case Instruction::UDiv: 
   case Instruction::SDiv: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
-      cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+    assert(C1->getType()->isIntOrIntVector() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FDiv:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
-      && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint())) 
-      && "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(C1->getType()->isFPOrFPVector() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::URem: 
   case Instruction::SRem: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
-      cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+    assert(C1->getType()->isIntOrIntVector() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FRem:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
-      && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint())) 
-      && "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(C1->getType()->isFPOrFPVector() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || isa<VectorType>(C1->getType())) &&
+    assert(C1->getType()->isIntOrIntVector() &&
            "Tried to create a logical operation on a non-integral type!");
     break;
   case Instruction::Shl:
@@ -2251,6 +2392,8 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
   argVec[1] = V1;
   argVec[2] = V2;
   ExprMapKeyType Key(Instruction::Select, argVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2274,6 +2417,8 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
   for (unsigned i = 0; i != NumIdx; ++i)
     ArgVec.push_back(cast<Constant>(Idxs[i]));
   const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec);
+
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2308,6 +2453,8 @@ ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+  // Implicitly locked.
   return ExprConstants->getOrCreate(Type::Int1Ty, Key);
 }
 
@@ -2325,6 +2472,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(Type::Int1Ty, Key);
 }
 
@@ -2370,6 +2519,8 @@ ConstantExpr::getVICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::VICmp, ArgVec, pred);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(LHS->getType(), Key);
 }
 
@@ -2417,6 +2568,8 @@ ConstantExpr::getVFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::VFCmp, ArgVec, pred);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ResultTy, Key);
 }
 
@@ -2428,6 +2581,8 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
   std::vector<Constant*> ArgVec(1, Val);
   ArgVec.push_back(Idx);
   const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2449,6 +2604,8 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
   ArgVec.push_back(Elt);
   ArgVec.push_back(Idx);
   const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2472,6 +2629,8 @@ Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
   ArgVec.push_back(V2);
   ArgVec.push_back(Mask);
   const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2555,6 +2714,7 @@ Constant *ConstantExpr::getZeroValueForNegationExpr(const Type *Ty) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantExpr::destroyConstant() {
+  // Implicitly locked.
   ExprConstants->remove(this);
   destroyConstantImpl();
 }
@@ -2619,6 +2779,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
     bool Exists;
     ArrayConstantsTy::MapTy::iterator I =
       ArrayConstants->InsertOrGetItem(Lookup, Exists);
@@ -2694,6 +2855,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
     bool Exists;
     StructConstantsTy::MapTy::iterator I =
       StructConstants->InsertOrGetItem(Lookup, Exists);
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 54bd895..0450566 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -16,7 +16,10 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/StringPool.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
@@ -230,17 +233,21 @@ void Function::removeAttribute(unsigned i, Attributes attr) {
 // use GC.
 static DenseMap<const Function*,PooledStringPtr> *GCNames;
 static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
 
 bool Function::hasGC() const {
+  sys::SmartScopedReader<true> Reader(&*GCLock);
   return GCNames && GCNames->count(this);
 }
 
 const char *Function::getGC() const {
   assert(hasGC() && "Function has no collector");
+  sys::SmartScopedReader<true> Reader(&*GCLock);
   return *(*GCNames)[this];
 }
 
 void Function::setGC(const char *Str) {
+  sys::SmartScopedWriter<true> Writer(&*GCLock);
   if (!GCNamePool)
     GCNamePool = new StringPool();
   if (!GCNames)
@@ -249,6 +256,7 @@ void Function::setGC(const char *Str) {
 }
 
 void Function::clearGC() {
+  sys::SmartScopedWriter<true> Writer(&*GCLock);
   if (GCNames) {
     GCNames->erase(this);
     if (GCNames->empty()) {
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 4c228fe..6a6424d 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1310,7 +1310,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
     return false;// Second operand of insertelement must be vector element type.
     
   if (Index->getType() != Type::Int32Ty)
-    return false;  // Third operand of insertelement must be uint.
+    return false;  // Third operand of insertelement must be i32.
   return true;
 }
 
@@ -1576,9 +1576,8 @@ void BinaryOperator::init(BinaryOps iType) {
   case FDiv:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
-            cast<VectorType>(getType())->getElementType()->isFloatingPoint())) 
-            && "Incorrect operand type (not floating point) for FDIV");
+    assert(getType()->isFPOrFPVector() &&
+           "Incorrect operand type (not floating point) for FDIV");
     break;
   case URem: 
   case SRem: 
@@ -1591,9 +1590,8 @@ void BinaryOperator::init(BinaryOps iType) {
   case FRem:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
-            cast<VectorType>(getType())->getElementType()->isFloatingPoint())) 
-            && "Incorrect operand type (not floating point) for FREM");
+    assert(getType()->isFPOrFPVector() &&
+           "Incorrect operand type (not floating point) for FREM");
     break;
   case Shl:
   case LShr:
@@ -1837,11 +1835,11 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const {
     case Instruction::BitCast:
       return true;  // BitCast never modifies bits.
     case Instruction::PtrToInt:
-      return IntPtrTy->getPrimitiveSizeInBits() ==
-            getType()->getPrimitiveSizeInBits();
+      return IntPtrTy->getScalarSizeInBits() ==
+             getType()->getScalarSizeInBits();
     case Instruction::IntToPtr:
-      return IntPtrTy->getPrimitiveSizeInBits() ==
-             getOperand(0)->getType()->getPrimitiveSizeInBits();
+      return IntPtrTy->getScalarSizeInBits() ==
+             getOperand(0)->getType()->getScalarSizeInBits();
   }
 }
 
@@ -1880,8 +1878,8 @@ unsigned CastInst::isEliminableCastPair(
   // BITCONVERT    =       FirstClass   n/a       FirstClass    n/a   
   //
   // NOTE: some transforms are safe, but we consider them to be non-profitable.
-  // For example, we could merge "fptoui double to uint" + "zext uint to ulong",
-  // into "fptoui double to ulong", but this loses information about the range
+  // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+  // into "fptoui double to i64", but this loses information about the range
   // of the produced value (we no longer know the top-part is all zeros). 
   // Further this conversion is often much more expensive for typical hardware,
   // and causes issues when building libgcc.  We disallow fptosi+sext for the 
@@ -1946,8 +1944,8 @@ unsigned CastInst::isEliminableCastPair(
       return 0;
     case 7: { 
       // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
-      unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
-      unsigned MidSize = MidTy->getPrimitiveSizeInBits();
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned MidSize = MidTy->getScalarSizeInBits();
       if (MidSize >= PtrSize)
         return Instruction::BitCast;
       return 0;
@@ -1956,8 +1954,8 @@ unsigned CastInst::isEliminableCastPair(
       // ext, trunc -> bitcast,    if the SrcTy and DstTy are same size
       // ext, trunc -> ext,        if sizeof(SrcTy) < sizeof(DstTy)
       // ext, trunc -> trunc,      if sizeof(SrcTy) > sizeof(DstTy)
-      unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
-      unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
       if (SrcSize == DstSize)
         return Instruction::BitCast;
       else if (SrcSize < DstSize)
@@ -1985,9 +1983,9 @@ unsigned CastInst::isEliminableCastPair(
       return 0;
     case 13: {
       // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
-      unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
-      unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
-      unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
       if (SrcSize <= PtrSize && SrcSize == DstSize)
         return Instruction::BitCast;
       return 0;
@@ -2051,7 +2049,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
 CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         Instruction *InsertBefore) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
   return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
 }
@@ -2059,7 +2057,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         BasicBlock *InsertAtEnd) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
 }
@@ -2067,7 +2065,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         Instruction *InsertBefore) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
   return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
 }
@@ -2075,7 +2073,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         BasicBlock *InsertAtEnd) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
 }
@@ -2083,7 +2081,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
                                          const std::string &Name,
                                          Instruction *InsertBefore) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
   return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
 }
@@ -2091,7 +2089,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
                                          const std::string &Name, 
                                          BasicBlock *InsertAtEnd) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
 }
@@ -2125,8 +2123,8 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
                                       bool isSigned, const std::string &Name,
                                       Instruction *InsertBefore) {
   assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::Trunc :
@@ -2137,9 +2135,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
                                       bool isSigned, const std::string &Name,
                                       BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::Trunc :
@@ -2150,10 +2149,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const std::string &Name, 
                                  Instruction *InsertBefore) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && 
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
@@ -2163,10 +2162,10 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const std::string &Name, 
                                  BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && 
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
@@ -2183,8 +2182,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
     return true;
 
   // Get the bit sizes, we'll need these
-  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr/vector
-  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+  unsigned SrcBits = SrcTy->getScalarSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
 
   // Run through the possibilities ...
   if (DestTy->isInteger()) {                   // Casting to integral
@@ -2242,8 +2241,8 @@ CastInst::getCastOpcode(
   const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
   // Get the bit sizes, we'll need these
   const Type *SrcTy = Src->getType();
-  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr/vector
-  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+  unsigned SrcBits = SrcTy->getScalarSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
 
   assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
          "Only first class types are castable!");
@@ -2344,8 +2343,8 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
     return false;
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DstBitSize = DstTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DstBitSize = DstTy->getScalarSizeInBits();
 
   // Switch on the opcode provided
   switch (op) {
@@ -2400,7 +2399,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
     // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
     // these cases, the cast is okay if the source and destination bit widths
     // are identical.
-    return SrcBitSize == DstBitSize;
+    return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
   }
 }
 
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
index 1bf9171..b5926bc 100644
--- a/lib/VMCore/LeakDetector.cpp
+++ b/lib/VMCore/LeakDetector.cpp
@@ -14,7 +14,10 @@
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Streams.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/Value.h"
 using namespace llvm;
 
@@ -29,16 +32,29 @@ namespace {
     static void print(const Value* P) { cerr << *P; }
   };
 
+  ManagedStatic<sys::SmartRWMutex<true> > LeakDetectorLock;
+
   template <typename T>
   struct VISIBILITY_HIDDEN LeakDetectorImpl {
-    explicit LeakDetectorImpl(const char* const name) : Cache(0), Name(name) { }
+    explicit LeakDetectorImpl(const char* const name = "") : 
+      Cache(0), Name(name) { }
 
+    void clear() {
+      Cache = 0;
+      Ts.clear();
+    }
+    
+    void setName(const char* n) { 
+      Name = n;
+    }
+    
     // Because the most common usage pattern, by far, is to add a
     // garbage object, then remove it immediately, we optimize this
     // case.  When an object is added, it is not added to the set
     // immediately, it is added to the CachedValue Value.  If it is
     // immediately removed, no set search need be performed.
     void addGarbage(const T* o) {
+      sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
       if (Cache) {
         assert(Ts.count(Cache) == 0 && "Object already in set!");
         Ts.insert(Cache);
@@ -47,6 +63,7 @@ namespace {
     }
 
     void removeGarbage(const T* o) {
+      sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
       if (o == Cache)
         Cache = 0; // Cache hit
       else
@@ -56,6 +73,7 @@ namespace {
     bool hasGarbage(const std::string& Message) {
       addGarbage(0); // Flush the Cache
 
+      sys::SmartScopedReader<true> Reader(&*LeakDetectorLock);
       assert(Cache == 0 && "No value should be cached anymore!");
 
       if (!Ts.empty()) {
@@ -70,58 +88,48 @@ namespace {
 
         return true;
       }
+      
       return false;
     }
 
   private:
     SmallPtrSet<const T*, 8> Ts;
     const T* Cache;
-    const char* const Name;
+    const char* Name;
   };
 
-  static LeakDetectorImpl<void>  *Objects;
-  static LeakDetectorImpl<Value> *LLVMObjects;
-
-  static LeakDetectorImpl<void> &getObjects() {
-    if (Objects == 0)
-      Objects = new LeakDetectorImpl<void>("GENERIC");
-    return *Objects;
-  }
-
-  static LeakDetectorImpl<Value> &getLLVMObjects() {
-    if (LLVMObjects == 0)
-      LLVMObjects = new LeakDetectorImpl<Value>("LLVM");
-    return *LLVMObjects;
-  }
+  static ManagedStatic<LeakDetectorImpl<void> > Objects;
+  static ManagedStatic<LeakDetectorImpl<Value> > LLVMObjects;
 
   static void clearGarbage() {
-    delete Objects;
-    delete LLVMObjects;
-    Objects = 0;
-    LLVMObjects = 0;
+    Objects->clear();
+    LLVMObjects->clear();
   }
 }
 
 void LeakDetector::addGarbageObjectImpl(void *Object) {
-  getObjects().addGarbage(Object);
+  Objects->addGarbage(Object);
 }
 
 void LeakDetector::addGarbageObjectImpl(const Value *Object) {
-  getLLVMObjects().addGarbage(Object);
+  LLVMObjects->addGarbage(Object);
 }
 
 void LeakDetector::removeGarbageObjectImpl(void *Object) {
-  getObjects().removeGarbage(Object);
+  Objects->removeGarbage(Object);
 }
 
 void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
-  getLLVMObjects().removeGarbage(Object);
+  LLVMObjects->removeGarbage(Object);
 }
 
 void LeakDetector::checkForGarbageImpl(const std::string &Message) {
+  Objects->setName("GENERIC");
+  LLVMObjects->setName("LLVM");
+  
   // use non-short-circuit version so that both checks are performed
-  if (getObjects().hasGarbage(Message) |
-      getLLVMObjects().hasGarbage(Message))
+  if (Objects->hasGarbage(Message) |
+      LLVMObjects->hasGarbage(Message))
     cerr << "\nThis is probably because you removed an object, but didn't "
          << "delete it.  Please check your code for memory leaks.\n";
 
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
index 0bd190a..1a68b89 100644
--- a/lib/VMCore/Mangler.cpp
+++ b/lib/VMCore/Mangler.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/Mangler.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/System/Atomic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
@@ -164,8 +165,12 @@ std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) {
   } else if (!GV->hasName()) {
     // Must mangle the global into a unique ID.
     unsigned TypeUniqueID = getTypeID(GV->getType());
-    static unsigned GlobalID = 0;
-    Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(GlobalID++);
+    static uint32_t GlobalID = 0;
+    
+    unsigned OldID = GlobalID;
+    sys::AtomicIncrement(&GlobalID);
+    
+    Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(OldID);
   } else {
     if (GV->hasPrivateLinkage())
       Name = makeNameProper(GV->getName() + Suffix, Prefix, PrivatePrefix);
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 6db5d7e..e943e31 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -19,6 +19,8 @@
 #include "llvm/ModuleProvider.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Threading.h"
 #include <algorithm>
 #include <map>
 #include <set>
@@ -192,8 +194,26 @@ static std::vector<PassRegistrationListener*> *Listeners = 0;
 // ressurection after llvm_shutdown is run.
 static PassRegistrar *getPassRegistrar() {
   static PassRegistrar *PassRegistrarObj = 0;
-  if (!PassRegistrarObj)
+  
+  // Use double-checked locking to safely initialize the registrar when
+  // we're running in multithreaded mode.
+  PassRegistrar* tmp = PassRegistrarObj;
+  if (llvm_is_multithreaded()) {
+    sys::MemoryFence();
+    if (!tmp) {
+      llvm_acquire_global_lock();
+      tmp = PassRegistrarObj;
+      if (!tmp) {
+        tmp = new PassRegistrar();
+        sys::MemoryFence();
+        PassRegistrarObj = tmp;
+      }
+      llvm_release_global_lock();
+    }
+  } else if (!tmp) {
     PassRegistrarObj = new PassRegistrar();
+  }
+  
   return PassRegistrarObj;
 }
 
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 4799915..86cf10e 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -20,6 +20,8 @@
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm-c/Core.h"
 #include <algorithm>
@@ -355,6 +357,9 @@ namespace {
 /// amount of time each pass takes to execute.  This only happens when
 /// -time-passes is enabled on the command line.
 ///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
 class VISIBILITY_HIDDEN TimingInfo {
   std::map<Pass*, Timer> TimingData;
   TimerGroup TG;
@@ -379,15 +384,18 @@ public:
     if (dynamic_cast<PMDataManager *>(P)) 
       return;
 
+    sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
     std::map<Pass*, Timer>::iterator I = TimingData.find(P);
     if (I == TimingData.end())
       I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first;
     I->second.startTimer();
   }
+  
   void passEnded(Pass *P) {
     if (dynamic_cast<PMDataManager *>(P)) 
       return;
 
+    sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
     std::map<Pass*, Timer>::iterator I = TimingData.find(P);
     assert(I != TimingData.end() && "passStarted/passEnded not nested right!");
     I->second.stopTimer();
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index a1e6c42..5df7f12 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -23,6 +23,9 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
@@ -40,6 +43,14 @@ AbstractTypeUser::~AbstractTypeUser() {}
 //                         Type Class Implementation
 //===----------------------------------------------------------------------===//
 
+// Reader/writer lock used for guarding access to the type maps.
+static ManagedStatic<sys::SmartRWMutex<true> > TypeMapLock;
+
+// Recursive lock used for guarding access to AbstractTypeUsers.
+// NOTE: The true template parameter means this will no-op when we're not in
+// multithreaded mode.
+static ManagedStatic<sys::SmartMutex<true> > AbstractTypeUsersLock;
+
 // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
 // for types as they are needed.  Because resolution of types must invalidate
 // all of the abstract type descriptions, we keep them in a seperate map to make
@@ -112,6 +123,14 @@ const Type *Type::getVAArgsPromotedType() const {
     return this;
 }
 
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+const Type *Type::getScalarType() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType();
+  return this;
+}
+
 /// isIntOrIntVector - Return true if this is an integer type or a vector of
 /// integer types.
 ///
@@ -174,6 +193,28 @@ unsigned Type::getPrimitiveSizeInBits() const {
   }
 }
 
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() const {
+  return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type.  This
+/// is only valid on floating point types.  If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->getFPMantissaWidth();
+  assert(isFloatingPoint() && "Not a floating point type!");
+  if (ID == FloatTyID) return 24;
+  if (ID == DoubleTyID) return 53;
+  if (ID == X86_FP80TyID) return 64;
+  if (ID == FP128TyID) return 113;
+  assert(ID == PPC_FP128TyID && "unknown fp type");
+  return -1;
+}
+
 /// isSizedDerivedType - Derived types like structures and arrays are sized
 /// iff all of the members of the type are sized as well.  Since asking for
 /// their size is relatively uncommon, move this operation out of line.
@@ -414,8 +455,29 @@ void DerivedType::dropAllTypeUses() {
   if (NumContainedTys != 0) {
     // The type must stay abstract.  To do this, we insert a pointer to a type
     // that will never get resolved, thus will always be abstract.
-    static Type *AlwaysOpaqueTy = OpaqueType::get();
-    static PATypeHolder Holder(AlwaysOpaqueTy);
+    static Type *AlwaysOpaqueTy = 0;
+    static PATypeHolder* Holder = 0;
+    Type *tmp = AlwaysOpaqueTy;
+    if (llvm_is_multithreaded()) {
+      sys::MemoryFence();
+      if (!tmp) {
+        llvm_acquire_global_lock();
+        tmp = AlwaysOpaqueTy;
+        if (!tmp) {
+          tmp = OpaqueType::get();
+          PATypeHolder* tmp2 = new PATypeHolder(AlwaysOpaqueTy);
+          sys::MemoryFence();
+          AlwaysOpaqueTy = tmp;
+          Holder = tmp2;
+        }
+      
+        llvm_release_global_lock();
+      }
+    } else {
+      AlwaysOpaqueTy = OpaqueType::get();
+      Holder = new PATypeHolder(AlwaysOpaqueTy);
+    } 
+        
     ContainedTys[0] = AlwaysOpaqueTy;
 
     // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
@@ -818,7 +880,7 @@ public:
         // We already have this type in the table.  Get rid of the newly refined
         // type.
         TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-        Ty->refineAbstractTypeTo(NewTy);
+        Ty->unlockedRefineAbstractTypeTo(NewTy);
         return;
       }
     } else {
@@ -854,7 +916,7 @@ public:
               }
               TypesByHash.erase(Entry);
             }
-            Ty->refineAbstractTypeTo(NewTy);
+            Ty->unlockedRefineAbstractTypeTo(NewTy);
             return;
           }
         }
@@ -938,15 +1000,30 @@ const IntegerType *IntegerType::get(unsigned NumBits) {
     default: 
       break;
   }
-
+  
   IntegerValType IVT(NumBits);
-  IntegerType *ITy = IntegerTypes->get(IVT);
-  if (ITy) return ITy;           // Found a match, return it!
-
-  // Value not found.  Derive a new type!
-  ITy = new IntegerType(NumBits);
-  IntegerTypes->add(IVT, ITy);
-
+  IntegerType *ITy = 0;
+  
+  // First, see if the type is already in the table, for which
+  // a reader lock suffices.
+  TypeMapLock->reader_acquire();
+  ITy = IntegerTypes->get(IVT);
+  TypeMapLock->reader_release();
+    
+  if (!ITy) {
+    // OK, not in the table, get a writer lock.
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    ITy = IntegerTypes->get(IVT);
+      
+    // We need to _recheck_ the table in case someone
+    // put it in between when we released the reader lock
+    // and when we gained the writer lock!
+    if (!ITy) {
+      // Value not found.  Derive a new type!
+      ITy = new IntegerType(NumBits);
+      IntegerTypes->add(IVT, ITy);
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *ITy << "\n";
 #endif
@@ -1010,14 +1087,26 @@ FunctionType *FunctionType::get(const Type *ReturnType,
                                 const std::vector<const Type*> &Params,
                                 bool isVarArg) {
   FunctionValType VT(ReturnType, Params, isVarArg);
-  FunctionType *FT = FunctionTypes->get(VT);
-  if (FT)
-    return FT;
-
-  FT = (FunctionType*) operator new(sizeof(FunctionType) +
-                                    sizeof(PATypeHandle)*(Params.size()+1));
-  new (FT) FunctionType(ReturnType, Params, isVarArg);
-  FunctionTypes->add(VT, FT);
+  FunctionType *FT = 0;
+  
+  TypeMapLock->reader_acquire();
+  FT = FunctionTypes->get(VT);
+  TypeMapLock->reader_release();
+  
+  if (!FT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    
+    // Have to check again here, because it might have
+    // been inserted between when we release the reader
+    // lock and when we acquired the writer lock.
+    FT = FunctionTypes->get(VT);
+    if (!FT) {
+      FT = (FunctionType*) operator new(sizeof(FunctionType) +
+                                      sizeof(PATypeHandle)*(Params.size()+1));
+      new (FT) FunctionType(ReturnType, Params, isVarArg);
+      FunctionTypes->add(VT, FT);
+    }
+  }
 
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << FT << "\n";
@@ -1049,20 +1138,30 @@ public:
   }
 };
 }
-static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
 
+static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
 
 ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
   assert(ElementType && "Can't get array of <null> types!");
   assert(isValidElementType(ElementType) && "Invalid type for array element!");
 
   ArrayValType AVT(ElementType, NumElements);
-  ArrayType *AT = ArrayTypes->get(AVT);
-  if (AT) return AT;           // Found a match, return it!
-
-  // Value not found.  Derive a new type!
-  ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
-
+  ArrayType *AT = 0;
+  
+  TypeMapLock->reader_acquire();
+  AT = ArrayTypes->get(AVT);
+  TypeMapLock->reader_release();
+    
+  if (!AT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    
+    // Recheck.  Might have changed between release and acquire.
+    AT = ArrayTypes->get(AVT);
+    if (!AT) {
+      // Value not found.  Derive a new type!
+      ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *AT << "\n";
 #endif
@@ -1106,19 +1205,27 @@ public:
   }
 };
 }
-static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
 
+static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
 
 VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
   assert(ElementType && "Can't get vector of <null> types!");
 
   VectorValType PVT(ElementType, NumElements);
-  VectorType *PT = VectorTypes->get(PVT);
-  if (PT) return PT;           // Found a match, return it!
-
-  // Value not found.  Derive a new type!
-  VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
-
+  VectorType *PT = 0;
+  
+  TypeMapLock->reader_acquire();
+  PT = VectorTypes->get(PVT);
+  TypeMapLock->reader_release();
+    
+  if (!PT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    PT = VectorTypes->get(PVT);
+    // Recheck.  Might have changed between release and acquire.
+    if (!PT) {
+      VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *PT << "\n";
 #endif
@@ -1173,15 +1280,24 @@ static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;
 StructType *StructType::get(const std::vector<const Type*> &ETypes, 
                             bool isPacked) {
   StructValType STV(ETypes, isPacked);
-  StructType *ST = StructTypes->get(STV);
-  if (ST) return ST;
-
-  // Value not found.  Derive a new type!
-  ST = (StructType*) operator new(sizeof(StructType) +
-                                  sizeof(PATypeHandle) * ETypes.size());
-  new (ST) StructType(ETypes, isPacked);
-  StructTypes->add(STV, ST);
-
+  StructType *ST = 0;
+  
+  TypeMapLock->reader_acquire();
+  ST = StructTypes->get(STV);
+  TypeMapLock->reader_release();
+    
+  if (!ST) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    ST = StructTypes->get(STV);
+    // Recheck.  Might have changed between release and acquire.
+    if (!ST) {
+      // Value not found.  Derive a new type!
+      ST = (StructType*) operator new(sizeof(StructType) +
+                                      sizeof(PATypeHandle) * ETypes.size());
+      new (ST) StructType(ETypes, isPacked);
+      StructTypes->add(STV, ST);
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *ST << "\n";
 #endif
@@ -1249,12 +1365,21 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
   assert(isValidElementType(ValueType) && "Invalid type for pointer element!");
   PointerValType PVT(ValueType, AddressSpace);
 
-  PointerType *PT = PointerTypes->get(PVT);
-  if (PT) return PT;
-
-  // Value not found.  Derive a new type!
-  PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
-
+  PointerType *PT = 0;
+  
+  TypeMapLock->reader_acquire();
+  PT = PointerTypes->get(PVT);
+  TypeMapLock->reader_release();
+  
+  if (!PT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    PT = PointerTypes->get(PVT);
+    // Recheck.  Might have changed between release and acquire.
+    if (!PT) {
+      // Value not found.  Derive a new type!
+      PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *PT << "\n";
 #endif
@@ -1281,12 +1406,24 @@ bool PointerType::isValidElementType(const Type *ElemTy) {
 //                     Derived Type Refinement Functions
 //===----------------------------------------------------------------------===//
 
+// addAbstractTypeUser - Notify an abstract type that there is a new user of
+// it.  This function is called primarily by the PATypeHandle class.
+void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
+  assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
+  AbstractTypeUsersLock->acquire();
+  AbstractTypeUsers.push_back(U);
+  AbstractTypeUsersLock->release();
+}
+
+
 // removeAbstractTypeUser - Notify an abstract type that a user of the class
 // no longer has a handle to the type.  This function is called primarily by
 // the PATypeHandle class.  When there are no users of the abstract type, it
 // is annihilated, because there is no way to get a reference to it ever again.
 //
 void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
+  AbstractTypeUsersLock->acquire();
+  
   // Search from back to front because we will notify users from back to
   // front.  Also, it is likely that there will be a stack like behavior to
   // users that register and unregister users.
@@ -1310,16 +1447,20 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
     DOUT << "DELETEing unused abstract type: <" << *this
          << ">[" << (void*)this << "]" << "\n";
 #endif
-    this->destroy();
+  
+  this->destroy();
   }
+  
+  AbstractTypeUsersLock->release();
 }
 
-// refineAbstractTypeTo - This function is used when it is discovered that
-// the 'this' abstract type is actually equivalent to the NewType specified.
-// This causes all users of 'this' to switch to reference the more concrete type
-// NewType and for 'this' to be deleted.
+// unlockedRefineAbstractTypeTo - This function is used when it is discovered
+// that the 'this' abstract type is actually equivalent to the NewType
+// specified. This causes all users of 'this' to switch to reference the more 
+// concrete type NewType and for 'this' to be deleted.  Only used for internal
+// callers.
 //
-void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
   assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
   assert(this != NewType && "Can't refine to myself!");
   assert(ForwardType == 0 && "This type has already been refined!");
@@ -1338,8 +1479,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // refined, that we will not continue using a dead reference...
   //
   PATypeHolder NewTy(NewType);
-
-  // Any PATypeHolders referring to this type will now automatically forward to
+  // Any PATypeHolders referring to this type will now automatically forward o
   // the type we are resolved to.
   ForwardType = NewType;
   if (NewType->isAbstract())
@@ -1362,6 +1502,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // will not cause users to drop off of the use list.  If we resolve to ourself
   // we succeed!
   //
+  AbstractTypeUsersLock->acquire();
   while (!AbstractTypeUsers.empty() && NewTy != this) {
     AbstractTypeUser *User = AbstractTypeUsers.back();
 
@@ -1377,6 +1518,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
     assert(AbstractTypeUsers.size() != OldSize &&
            "AbsTyUser did not remove self from user list!");
   }
+  AbstractTypeUsersLock->release();
 
   // If we were successful removing all users from the type, 'this' will be
   // deleted when the last PATypeHolder is destroyed or updated from this type.
@@ -1384,6 +1526,16 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // destroyed.
 }
 
+// refineAbstractTypeTo - This function is used by external callers to notify
+// us that this abstract type is equivalent to another type.
+//
+void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+  // All recursive calls will go through unlockedRefineAbstractTypeTo,
+  // to avoid deadlock problems.
+  sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+  unlockedRefineAbstractTypeTo(NewType);
+}
+
 // notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
 // the current type has transitioned from being abstract to being concrete.
 //
@@ -1392,6 +1544,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
   DOUT << "typeIsREFINED type: " << (void*)this << " " << *this << "\n";
 #endif
 
+  AbstractTypeUsersLock->acquire();
   unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
   while (!AbstractTypeUsers.empty()) {
     AbstractTypeUser *ATU = AbstractTypeUsers.back();
@@ -1400,6 +1553,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
     assert(AbstractTypeUsers.size() < OldSize-- &&
            "AbstractTypeUser did not remove itself from the use list!");
   }
+  AbstractTypeUsersLock->release();
 }
 
 // refineAbstractType - Called when a contained type is found to be more
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index 475d719..5ae60e2 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -14,13 +14,18 @@
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Streams.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include <algorithm>
 using namespace llvm;
 
 #define DEBUG_SYMBOL_TABLE 0
 #define DEBUG_ABSTYPE 0
 
+static ManagedStatic<sys::SmartRWMutex<true> > TypeSymbolTableLock;
+
 TypeSymbolTable::~TypeSymbolTable() {
   // Drop all abstract type references in the type plane...
   for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
@@ -31,6 +36,9 @@ TypeSymbolTable::~TypeSymbolTable() {
 
 std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
   std::string TryName = BaseName;
+  
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  
   const_iterator End = tmap.end();
 
   // See if the name exists
@@ -41,16 +49,20 @@ std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
 
 // lookup a type by name - returns null on failure
 Type* TypeSymbolTable::lookup(const std::string& Name) const {
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  
   const_iterator TI = tmap.find(Name);
+  Type* result = 0;
   if (TI != tmap.end())
-    return const_cast<Type*>(TI->second);
-  return 0;
+    result = const_cast<Type*>(TI->second);
+  return result;
 }
 
 // remove - Remove a type from the symbol table...
 Type* TypeSymbolTable::remove(iterator Entry) {
+  TypeSymbolTableLock->writer_acquire();
+  
   assert(Entry != tmap.end() && "Invalid entry to remove!");
-
   const Type* Result = Entry->second;
 
 #if DEBUG_SYMBOL_TABLE
@@ -59,6 +71,8 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 #endif
 
   tmap.erase(Entry);
+  
+  TypeSymbolTableLock->writer_release();
 
   // If we are removing an abstract type, remove the symbol table from it's use
   // list...
@@ -79,6 +93,8 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
   assert(T && "Can't insert null type into symbol table!");
 
+  TypeSymbolTableLock->writer_acquire();
+
   if (tmap.insert(make_pair(Name, T)).second) {
     // Type inserted fine with no conflict.
     
@@ -103,6 +119,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
     // Insert the tmap entry
     tmap.insert(make_pair(UniqueName, T));
   }
+  
+  TypeSymbolTableLock->writer_release();
 
   // If we are adding an abstract type, add the symbol table to it's use list.
   if (T->isAbstract()) {
@@ -116,7 +134,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
 // This function is called when one of the types in the type plane are refined
 void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
                                          const Type *NewType) {
-
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  
   // Loop over all of the types in the symbol table, replacing any references
   // to OldType with references to NewType.  Note that there may be multiple
   // occurrences, and although we only need to remove one at a time, it's
@@ -146,6 +165,7 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
   // Loop over all of the types in the symbol table, dropping any abstract
   // type user entries for AbsTy which occur because there are names for the
   // type.
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
   for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
     if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
       AbsTy->removeAbstractTypeUser(this);
@@ -159,6 +179,7 @@ static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
 
 void TypeSymbolTable::dump() const {
   cerr << "TypeSymbolPlane: ";
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
   for_each(tmap.begin(), tmap.end(), DumpTypes);
 }
 
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 3af161f..c952b78 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -22,6 +22,8 @@
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/ADT/DenseMap.h"
 #include <algorithm>
 using namespace llvm;
@@ -405,6 +407,7 @@ Value *Value::DoPHITranslation(const BasicBlock *CurBB,
 /// not a value has an entry in this map.
 typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
 static ManagedStatic<ValueHandlesTy> ValueHandles;
+static ManagedStatic<sys::SmartRWMutex<true> > ValueHandlesLock;
 
 /// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
 /// List is known to point into the existing use list.
@@ -427,9 +430,11 @@ void ValueHandleBase::AddToUseList() {
   if (VP->HasValueHandle) {
     // If this value already has a ValueHandle, then it must be in the
     // ValueHandles map already.
+    sys::SmartScopedReader<true> Reader(&*ValueHandlesLock);
     ValueHandleBase *&Entry = (*ValueHandles)[VP];
     assert(Entry != 0 && "Value doesn't have any handles?");
-    return AddToExistingUseList(&Entry);
+    AddToExistingUseList(&Entry);
+    return;
   }
   
   // Ok, it doesn't have any handles yet, so we must insert it into the
@@ -437,6 +442,7 @@ void ValueHandleBase::AddToUseList() {
   // reallocate itself, which would invalidate all of the PrevP pointers that
   // point into the old table.  Handle this by checking for reallocation and
   // updating the stale pointers only if needed.
+  sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
   ValueHandlesTy &Handles = *ValueHandles;
   const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
   
@@ -448,8 +454,9 @@ void ValueHandleBase::AddToUseList() {
   // If reallocation didn't happen or if this was the first insertion, don't
   // walk the table.
   if (Handles.isPointerIntoBucketsArray(OldBucketPtr) || 
-      Handles.size() == 1)
+      Handles.size() == 1) {
     return;
+  }
   
   // Okay, reallocation did happen.  Fix the Prev Pointers.
   for (ValueHandlesTy::iterator I = Handles.begin(), E = Handles.end();
@@ -477,6 +484,7 @@ void ValueHandleBase::RemoveFromUseList() {
   // If the Next pointer was null, then it is possible that this was the last
   // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
   // map.
+  sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
   ValueHandlesTy &Handles = *ValueHandles;
   if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
     Handles.erase(VP);
@@ -490,7 +498,9 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
 
   // Get the linked list base, which is guaranteed to exist since the
   // HasValueHandle flag is set.
+  ValueHandlesLock->reader_acquire();
   ValueHandleBase *Entry = (*ValueHandles)[V];
+  ValueHandlesLock->reader_release();
   assert(Entry && "Value bit set but no entries exist");
   
   while (Entry) {
@@ -528,7 +538,9 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
   
   // Get the linked list base, which is guaranteed to exist since the
   // HasValueHandle flag is set.
+  ValueHandlesLock->reader_acquire();
   ValueHandleBase *Entry = (*ValueHandles)[Old];
+  ValueHandlesLock->reader_release();
   assert(Entry && "Value bit set but no entries exist");
   
   while (Entry) {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index e9f2acd..10816e6 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -745,8 +745,8 @@ void Verifier::visitTruncInst(TruncInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I);
   Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I);
@@ -767,8 +767,8 @@ void Verifier::visitZExtInst(ZExtInst &I) {
   Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I);
   Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
           "zext source and destination must both be a vector or neither", &I);
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
 
@@ -781,8 +781,8 @@ void Verifier::visitSExtInst(SExtInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I);
   Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I);
@@ -798,8 +798,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I);
   Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I);
@@ -816,8 +816,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I);
   Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I);
author	ed <ed@FreeBSD.org>	2009-06-22 08:08:12 +0000
committer	ed <ed@FreeBSD.org>	2009-06-22 08:08:12 +0000
commit	a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe (patch)
tree	86c1bc482baa6c81fc70b8d715153bfa93377186 /lib
parent	db89e312d968c258aba3c79c1c398f5fb19267a3 (diff)
download	FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.zip FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.tar.gz