Update LLVM to r90226.

author: rdivacky <rdivacky@FreeBSD.org> 2009-12-01 11:07:05 +0000
committer: rdivacky <rdivacky@FreeBSD.org> 2009-12-01 11:07:05 +0000
commit: e7908924d847e63b02bc82bfaa1709ab9c774dcd (patch)
tree: ffe0478472eaa0686f11cb02c6df7d257b8719b0 /lib
parent: bf68f1ea49e39c4194f339ddd4421b0c3a31988b (diff)
download: FreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.zip
FreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.tar.gz
168 files changed, 6617 insertions, 5380 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 0234965..dee9b53 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -49,21 +49,11 @@ AliasAnalysis::alias(const Value *V1, unsigned V1Size,
   return AA->alias(V1, V1Size, V2, V2Size);
 }
 
-void AliasAnalysis::getMustAliases(Value *P, std::vector<Value*> &RetVals) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->getMustAliases(P, RetVals);
-}
-
 bool AliasAnalysis::pointsToConstantMemory(const Value *P) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
   return AA->pointsToConstantMemory(P);
 }
 
-bool AliasAnalysis::hasNoModRefInfoForCalls() const {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->hasNoModRefInfoForCalls();
-}
-
 void AliasAnalysis::deleteValue(Value *V) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
   AA->deleteValue(V);
@@ -137,17 +127,18 @@ AliasAnalysis::getModRefBehavior(Function *F,
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
-  ModRefResult Mask = ModRef;
   ModRefBehavior MRB = getModRefBehavior(CS);
   if (MRB == DoesNotAccessMemory)
     return NoModRef;
-  else if (MRB == OnlyReadsMemory)
+  
+  ModRefResult Mask = ModRef;
+  if (MRB == OnlyReadsMemory)
     Mask = Ref;
   else if (MRB == AliasAnalysis::AccessesArguments) {
     bool doesAlias = false;
     for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
          AI != AE; ++AI)
-      if (alias(*AI, ~0U, P, Size) != NoAlias) {
+      if (!isNoAlias(*AI, ~0U, P, Size)) {
         doesAlias = true;
         break;
       }
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index cf4727f..6868e3f 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -90,11 +90,6 @@ namespace {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
     
-    void getMustAliases(Value *P, std::vector<Value*> &RetVals) {
-      assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
-      return AliasAnalysis::getMustAliases(P, RetVals);
-    }
-
     bool pointsToConstantMemory(const Value *P) {
       assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
       return AliasAnalysis::pointsToConstantMemory(P);
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index c037c8d..6634600 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -153,9 +153,6 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
 
   // Check the call sites list and invoke list...
   if (!CallSites.empty()) {
-    if (AA.hasNoModRefInfoForCalls())
-      return true;
-
     for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
       if (AA.getModRefInfo(CallSites[i], const_cast<Value*>(Ptr), Size)
                    != AliasAnalysis::NoModRef)
@@ -169,9 +166,6 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
   if (AA.doesNotAccessMemory(CS))
     return false;
 
-  if (AA.hasNoModRefInfoForCalls())
-    return true;
-
   for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
     if (AA.getModRefInfo(CallSites[i], CS) != AliasAnalysis::NoModRef ||
         AA.getModRefInfo(CS, CallSites[i]) != AliasAnalysis::NoModRef)
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index b8d69f4..b2983c7 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -14,8 +14,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -25,12 +23,13 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -38,26 +37,6 @@ using namespace llvm;
 // Useful predicates
 //===----------------------------------------------------------------------===//
 
-static const Value *GetGEPOperands(const Value *V, 
-                                   SmallVector<Value*, 16> &GEPOps) {
-  assert(GEPOps.empty() && "Expect empty list to populate!");
-  GEPOps.insert(GEPOps.end(), cast<User>(V)->op_begin()+1,
-                cast<User>(V)->op_end());
-
-  // Accumulate all of the chained indexes into the operand array
-  V = cast<User>(V)->getOperand(0);
-
-  while (const GEPOperator *G = dyn_cast<GEPOperator>(V)) {
-    if (!isa<Constant>(GEPOps[0]) || isa<GlobalValue>(GEPOps[0]) ||
-        !cast<Constant>(GEPOps[0])->isNullValue())
-      break;  // Don't handle folding arbitrary pointer offsets yet...
-    GEPOps.erase(GEPOps.begin());   // Drop the zero index
-    GEPOps.insert(GEPOps.begin(), G->op_begin()+1, G->op_end());
-    V = G->getOperand(0);
-  }
-  return V;
-}
-
 /// isKnownNonNull - Return true if we know that the specified value is never
 /// null.
 static bool isKnownNonNull(const Value *V) {
@@ -79,7 +58,12 @@ static bool isKnownNonNull(const Value *V) {
 static bool isNonEscapingLocalObject(const Value *V) {
   // If this is a local allocation, check to see if it escapes.
   if (isa<AllocaInst>(V) || isNoAliasCall(V))
-    return !PointerMayBeCaptured(V, false);
+    // Set StoreCaptures to True so that we can assume in our callers that the
+    // pointer is not the result of a load instruction. Currently
+    // PointerMayBeCaptured doesn't have any special analysis for the
+    // StoreCaptures=false case; if it did, our callers could be refined to be
+    // more precise.
+    return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
 
   // If this is an argument that corresponds to a byval or noalias argument,
   // then it has not escaped before entering the function.  Check if it escapes
@@ -89,7 +73,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
       // Don't bother analyzing arguments already known not to escape.
       if (A->hasNoCaptureAttr())
         return true;
-      return !PointerMayBeCaptured(V, false);
+      return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
     }
   return false;
 }
@@ -159,7 +143,6 @@ namespace {
       llvm_unreachable("This method may not be called on this function!");
     }
 
-    virtual void getMustAliases(Value *P, std::vector<Value*> &RetVals) { }
     virtual bool pointsToConstantMemory(const Value *P) { return false; }
     virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) {
       return ModRef;
@@ -167,7 +150,6 @@ namespace {
     virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
       return ModRef;
     }
-    virtual bool hasNoModRefInfoForCalls() const { return true; }
 
     virtual void deleteValue(Value *V) {}
     virtual void copyValue(Value *From, Value *To) {}
@@ -206,10 +188,6 @@ namespace {
     ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
     ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
 
-    /// hasNoModRefInfoForCalls - We can provide mod/ref information against
-    /// non-escaping allocations.
-    virtual bool hasNoModRefInfoForCalls() const { return false; }
-
     /// pointsToConstantMemory - Chase pointers until we find a (constant
     /// global) or not.
     bool pointsToConstantMemory(const Value *P);
@@ -218,13 +196,14 @@ namespace {
     // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call.
     SmallPtrSet<const Value*, 16> VisitedPHIs;
 
-    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
-    // against another.
-    AliasResult aliasGEP(const Value *V1, unsigned V1Size,
-                         const Value *V2, unsigned V2Size);
+    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
+    // instruction against another.
+    AliasResult aliasGEP(const GEPOperator *V1, unsigned V1Size,
+                         const Value *V2, unsigned V2Size,
+                         const Value *UnderlyingV1, const Value *UnderlyingV2);
 
-    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
-    // against another.
+    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
+    // instruction against another.
     AliasResult aliasPHI(const PHINode *PN, unsigned PNSize,
                          const Value *V2, unsigned V2Size);
 
@@ -234,15 +213,6 @@ namespace {
 
     AliasResult aliasCheck(const Value *V1, unsigned V1Size,
                            const Value *V2, unsigned V2Size);
-
-    // CheckGEPInstructions - Check two GEP instructions with known
-    // must-aliasing base pointers.  This checks to see if the index expressions
-    // preclude the pointers from aliasing...
-    AliasResult
-    CheckGEPInstructions(const Type* BasePtr1Ty,
-                         Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1Size,
-                         const Type *BasePtr2Ty,
-                         Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2Size);
   };
 }  // End of anonymous namespace
 
@@ -264,107 +234,124 @@ ImmutablePass *llvm::createBasicAliasAnalysisPass() {
 bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) {
   if (const GlobalVariable *GV = 
         dyn_cast<GlobalVariable>(P->getUnderlyingObject()))
+    // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+    // global to be marked constant in some modules and non-constant in others.
+    // GV may even be a declaration, not a definition.
     return GV->isConstant();
   return false;
 }
 
 
-// getModRefInfo - Check to see if the specified callsite can clobber the
-// specified memory object.  Since we only look at local properties of this
-// function, we really can't say much about this query.  We do, however, use
-// simple "address taken" analysis on local objects.
-//
+/// getModRefInfo - Check to see if the specified callsite can clobber the
+/// specified memory object.  Since we only look at local properties of this
+/// function, we really can't say much about this query.  We do, however, use
+/// simple "address taken" analysis on local objects.
 AliasAnalysis::ModRefResult
 BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
-  if (!isa<Constant>(P)) {
-    const Value *Object = P->getUnderlyingObject();
-    
-    // If this is a tail call and P points to a stack location, we know that
-    // the tail call cannot access or modify the local stack.
-    // We cannot exclude byval arguments here; these belong to the caller of
-    // the current function not to the current function, and a tail callee
-    // may reference them.
-    if (isa<AllocaInst>(Object))
-      if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
-        if (CI->isTailCall())
-          return NoModRef;
-    
-    // If the pointer is to a locally allocated object that does not escape,
-    // then the call can not mod/ref the pointer unless the call takes the
-    // argument without capturing it.
-    if (isNonEscapingLocalObject(Object) && CS.getInstruction() != Object) {
-      bool passedAsArg = false;
-      // TODO: Eventually only check 'nocapture' arguments.
-      for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
-           CI != CE; ++CI)
-        if (isa<PointerType>((*CI)->getType()) &&
-            alias(cast<Value>(CI), ~0U, P, ~0U) != NoAlias)
-          passedAsArg = true;
-      
-      if (!passedAsArg)
+  const Value *Object = P->getUnderlyingObject();
+  
+  // If this is a tail call and P points to a stack location, we know that
+  // the tail call cannot access or modify the local stack.
+  // We cannot exclude byval arguments here; these belong to the caller of
+  // the current function not to the current function, and a tail callee
+  // may reference them.
+  if (isa<AllocaInst>(Object))
+    if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+      if (CI->isTailCall())
         return NoModRef;
-    }
-
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
-      switch (II->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::memcpy:
-      case Intrinsic::memmove: {
-        unsigned Len = ~0U;
-        if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3)))
-          Len = LenCI->getZExtValue();
-        Value *Dest = II->getOperand(1);
-        Value *Src = II->getOperand(2);
-        if (alias(Dest, Len, P, Size) == NoAlias) {
-          if (alias(Src, Len, P, Size) == NoAlias)
-            return NoModRef;
-          return Ref;
-        }
-        }
-        break;
-      case Intrinsic::memset:
-        if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) {
-          unsigned Len = LenCI->getZExtValue();
-          Value *Dest = II->getOperand(1);
-          if (alias(Dest, Len, P, Size) == NoAlias)
-            return NoModRef;
-        }
-        break;
-      case Intrinsic::atomic_cmp_swap:
-      case Intrinsic::atomic_swap:
-      case Intrinsic::atomic_load_add:
-      case Intrinsic::atomic_load_sub:
-      case Intrinsic::atomic_load_and:
-      case Intrinsic::atomic_load_nand:
-      case Intrinsic::atomic_load_or:
-      case Intrinsic::atomic_load_xor:
-      case Intrinsic::atomic_load_max:
-      case Intrinsic::atomic_load_min:
-      case Intrinsic::atomic_load_umax:
-      case Intrinsic::atomic_load_umin:
-        if (TD) {
-          Value *Op1 = II->getOperand(1);
-          unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
-          if (alias(Op1, Op1Size, P, Size) == NoAlias)
-            return NoModRef;
-        }
+  
+  // If the pointer is to a locally allocated object that does not escape,
+  // then the call can not mod/ref the pointer unless the call takes the pointer
+  // as an argument, and itself doesn't capture it.
+  if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+      isNonEscapingLocalObject(Object)) {
+    bool PassedAsArg = false;
+    unsigned ArgNo = 0;
+    for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+         CI != CE; ++CI, ++ArgNo) {
+      // Only look at the no-capture pointer arguments.
+      if (!isa<PointerType>((*CI)->getType()) ||
+          !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
+        continue;
+      
+      // If  this is a no-capture pointer argument, see if we can tell that it
+      // is impossible to alias the pointer we're checking.  If not, we have to
+      // assume that the call could touch the pointer, even though it doesn't
+      // escape.
+      if (!isNoAlias(cast<Value>(CI), ~0U, P, ~0U)) {
+        PassedAsArg = true;
         break;
-      case Intrinsic::lifetime_start:
-      case Intrinsic::lifetime_end:
-      case Intrinsic::invariant_start: {
-        unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-        if (alias(II->getOperand(2), PtrSize, P, Size) == NoAlias)
-          return NoModRef;
-      }
-      break;
-      case Intrinsic::invariant_end: {
-        unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-        if (alias(II->getOperand(3), PtrSize, P, Size) == NoAlias)
-          return NoModRef;
-      }
-      break;
       }
     }
+    
+    if (!PassedAsArg)
+      return NoModRef;
+  }
+
+  // Finally, handle specific knowledge of intrinsics.
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+  if (II == 0)
+    return AliasAnalysis::getModRefInfo(CS, P, Size);
+
+  switch (II->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove: {
+    unsigned Len = ~0U;
+    if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3)))
+      Len = LenCI->getZExtValue();
+    Value *Dest = II->getOperand(1);
+    Value *Src = II->getOperand(2);
+    if (isNoAlias(Dest, Len, P, Size)) {
+      if (isNoAlias(Src, Len, P, Size))
+        return NoModRef;
+      return Ref;
+    }
+    break;
+  }
+  case Intrinsic::memset:
+    // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+    // will handle it for the variable length case.
+    if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) {
+      unsigned Len = LenCI->getZExtValue();
+      Value *Dest = II->getOperand(1);
+      if (isNoAlias(Dest, Len, P, Size))
+        return NoModRef;
+    }
+    break;
+  case Intrinsic::atomic_cmp_swap:
+  case Intrinsic::atomic_swap:
+  case Intrinsic::atomic_load_add:
+  case Intrinsic::atomic_load_sub:
+  case Intrinsic::atomic_load_and:
+  case Intrinsic::atomic_load_nand:
+  case Intrinsic::atomic_load_or:
+  case Intrinsic::atomic_load_xor:
+  case Intrinsic::atomic_load_max:
+  case Intrinsic::atomic_load_min:
+  case Intrinsic::atomic_load_umax:
+  case Intrinsic::atomic_load_umin:
+    if (TD) {
+      Value *Op1 = II->getOperand(1);
+      unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
+      if (isNoAlias(Op1, Op1Size, P, Size))
+        return NoModRef;
+    }
+    break;
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+  case Intrinsic::invariant_start: {
+    unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
+    if (isNoAlias(II->getOperand(2), PtrSize, P, Size))
+      return NoModRef;
+    break;
+  }
+  case Intrinsic::invariant_end: {
+    unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
+    if (isNoAlias(II->getOperand(3), PtrSize, P, Size))
+      return NoModRef;
+    break;
+  }
   }
 
   // The AliasAnalysis base class has some smarts, lets use them.
@@ -389,141 +376,157 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
   return NoAA::getModRefInfo(CS1, CS2);
 }
 
-// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
-// against another.
-//
+/// GetIndiceDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers. 
+static void GetIndiceDifference(
+                      SmallVectorImpl<std::pair<const Value*, int64_t> > &Dest,
+                const SmallVectorImpl<std::pair<const Value*, int64_t> > &Src) {
+  if (Src.empty()) return;
+
+  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+    const Value *V = Src[i].first;
+    int64_t Scale = Src[i].second;
+    
+    // Find V in Dest.  This is N^2, but pointer indices almost never have more
+    // than a few variable indexes.
+    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+      if (Dest[j].first != V) continue;
+      
+      // If we found it, subtract off Scale V's from the entry in Dest.  If it
+      // goes to zero, remove the entry.
+      if (Dest[j].second != Scale)
+        Dest[j].second -= Scale;
+      else
+        Dest.erase(Dest.begin()+j);
+      Scale = 0;
+      break;
+    }
+    
+    // If we didn't consume this entry, add it to the end of the Dest list.
+    if (Scale)
+      Dest.push_back(std::make_pair(V, -Scale));
+  }
+}
+
+/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+/// against another pointer.  We know that V1 is a GEP, but we don't know
+/// anything about V2.  UnderlyingV1 is GEP1->getUnderlyingObject(),
+/// UnderlyingV2 is the same for V2.
+///
 AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size,
-                             const Value *V2, unsigned V2Size) {
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
+                             const Value *V2, unsigned V2Size,
+                             const Value *UnderlyingV1,
+                             const Value *UnderlyingV2) {
+  int64_t GEP1BaseOffset;
+  SmallVector<std::pair<const Value*, int64_t>, 4> GEP1VariableIndices;
+
   // If we have two gep instructions with must-alias'ing base pointers, figure
   // out if the indexes to the GEP tell us anything about the derived pointer.
-  // Note that we also handle chains of getelementptr instructions as well as
-  // constant expression getelementptrs here.
-  //
-  if (isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
-    const User *GEP1 = cast<User>(V1);
-    const User *GEP2 = cast<User>(V2);
+  if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+    // Do the base pointers alias?
+    AliasResult BaseAlias = aliasCheck(UnderlyingV1, ~0U, UnderlyingV2, ~0U);
     
-    // If V1 and V2 are identical GEPs, just recurse down on both of them.
-    // This allows us to analyze things like:
-    //   P = gep A, 0, i, 1
-    //   Q = gep B, 0, i, 1
-    // by just analyzing A and B.  This is even safe for variable indices.
-    if (GEP1->getType() == GEP2->getType() &&
-        GEP1->getNumOperands() == GEP2->getNumOperands() &&
-        GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() &&
-        // All operands are the same, ignoring the base.
-        std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1))
-      return aliasCheck(GEP1->getOperand(0), V1Size,
-                        GEP2->getOperand(0), V2Size);
+    // If we get a No or May, then return it immediately, no amount of analysis
+    // will improve this situation.
+    if (BaseAlias != MustAlias) return BaseAlias;
     
-    // Drill down into the first non-gep value, to test for must-aliasing of
-    // the base pointers.
-    while (isa<GEPOperator>(GEP1->getOperand(0)) &&
-           GEP1->getOperand(1) ==
-           Constant::getNullValue(GEP1->getOperand(1)->getType()))
-      GEP1 = cast<User>(GEP1->getOperand(0));
-    const Value *BasePtr1 = GEP1->getOperand(0);
-
-    while (isa<GEPOperator>(GEP2->getOperand(0)) &&
-           GEP2->getOperand(1) ==
-           Constant::getNullValue(GEP2->getOperand(1)->getType()))
-      GEP2 = cast<User>(GEP2->getOperand(0));
-    const Value *BasePtr2 = GEP2->getOperand(0);
-
-    // Do the base pointers alias?
-    AliasResult BaseAlias = aliasCheck(BasePtr1, ~0U, BasePtr2, ~0U);
-    if (BaseAlias == NoAlias) return NoAlias;
-    if (BaseAlias == MustAlias) {
-      // If the base pointers alias each other exactly, check to see if we can
-      // figure out anything about the resultant pointers, to try to prove
-      // non-aliasing.
-
-      // Collect all of the chained GEP operands together into one simple place
-      SmallVector<Value*, 16> GEP1Ops, GEP2Ops;
-      BasePtr1 = GetGEPOperands(V1, GEP1Ops);
-      BasePtr2 = GetGEPOperands(V2, GEP2Ops);
-
-      // If GetGEPOperands were able to fold to the same must-aliased pointer,
-      // do the comparison.
-      if (BasePtr1 == BasePtr2) {
-        AliasResult GAlias =
-          CheckGEPInstructions(BasePtr1->getType(),
-                               &GEP1Ops[0], GEP1Ops.size(), V1Size,
-                               BasePtr2->getType(),
-                               &GEP2Ops[0], GEP2Ops.size(), V2Size);
-        if (GAlias != MayAlias)
-          return GAlias;
-      }
+    // Otherwise, we have a MustAlias.  Since the base pointers alias each other
+    // exactly, see if the computed offset from the common pointer tells us
+    // about the relation of the resulting pointer.
+    const Value *GEP1BasePtr =
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+    
+    int64_t GEP2BaseOffset;
+    SmallVector<std::pair<const Value*, int64_t>, 4> GEP2VariableIndices;
+    const Value *GEP2BasePtr =
+      DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+    
+    // If DecomposeGEPExpression isn't able to look all the way through the
+    // addressing operation, we must not have TD and this is too complex for us
+    // to handle without it.
+    if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+      assert(TD == 0 &&
+             "DecomposeGEPExpression and getUnderlyingObject disagree!");
+      return MayAlias;
     }
-  }
+    
+    // Subtract the GEP2 pointer from the GEP1 pointer to find out their
+    // symbolic difference.
+    GEP1BaseOffset -= GEP2BaseOffset;
+    GetIndiceDifference(GEP1VariableIndices, GEP2VariableIndices);
+    
+  } else {
+    // Check to see if these two pointers are related by the getelementptr
+    // instruction.  If one pointer is a GEP with a non-zero index of the other
+    // pointer, we know they cannot alias.
 
-  // Check to see if these two pointers are related by a getelementptr
-  // instruction.  If one pointer is a GEP with a non-zero index of the other
-  // pointer, we know they cannot alias.
-  //
-  if (V1Size == ~0U || V2Size == ~0U)
-    return MayAlias;
+    // If both accesses are unknown size, we can't do anything useful here.
+    if (V1Size == ~0U && V2Size == ~0U)
+      return MayAlias;
 
-  SmallVector<Value*, 16> GEPOperands;
-  const Value *BasePtr = GetGEPOperands(V1, GEPOperands);
-
-  AliasResult R = aliasCheck(BasePtr, ~0U, V2, V2Size);
-  if (R != MustAlias)
-    // If V2 may alias GEP base pointer, conservatively returns MayAlias.
-    // If V2 is known not to alias GEP base pointer, then the two values
-    // cannot alias per GEP semantics: "A pointer value formed from a
-    // getelementptr instruction is associated with the addresses associated
-    // with the first operand of the getelementptr".
-    return R;
-
-  // If there is at least one non-zero constant index, we know they cannot
-  // alias.
-  bool ConstantFound = false;
-  bool AllZerosFound = true;
-  for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i)
-    if (const Constant *C = dyn_cast<Constant>(GEPOperands[i])) {
-      if (!C->isNullValue()) {
-        ConstantFound = true;
-        AllZerosFound = false;
-        break;
-      }
-    } else {
-      AllZerosFound = false;
+    AliasResult R = aliasCheck(UnderlyingV1, ~0U, V2, V2Size);
+    if (R != MustAlias)
+      // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+      // If V2 is known not to alias GEP base pointer, then the two values
+      // cannot alias per GEP semantics: "A pointer value formed from a
+      // getelementptr instruction is associated with the addresses associated
+      // with the first operand of the getelementptr".
+      return R;
+
+    const Value *GEP1BasePtr =
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+    
+    // If DecomposeGEPExpression isn't able to look all the way through the
+    // addressing operation, we must not have TD and this is too complex for us
+    // to handle without it.
+    if (GEP1BasePtr != UnderlyingV1) {
+      assert(TD == 0 &&
+             "DecomposeGEPExpression and getUnderlyingObject disagree!");
+      return MayAlias;
     }
-
-  // If we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 must aliases
-  // the ptr, the end result is a must alias also.
-  if (AllZerosFound)
+  }
+  
+  // In the two GEP Case, if there is no difference in the offsets of the
+  // computed pointers, the resultant pointers are a must alias.  This
+  // hapens when we have two lexically identical GEP's (for example).
+  //
+  // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
+  // must aliases the GEP, the end result is a must alias also.
+  if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
     return MustAlias;
 
-  if (ConstantFound) {
-    if (V2Size <= 1 && V1Size <= 1)  // Just pointer check?
+  // If we have a known constant offset, see if this offset is larger than the
+  // access size being queried.  If so, and if no variable indices can remove
+  // pieces of this constant, then we know we have a no-alias.  For example,
+  //   &A[100] != &A.
+  
+  // In order to handle cases like &A[100][i] where i is an out of range
+  // subscript, we have to ignore all constant offset pieces that are a multiple
+  // of a scaled index.  Do this by removing constant offsets that are a
+  // multiple of any of our variable indices.  This allows us to transform
+  // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1
+  // provides an offset of 4 bytes (assuming a <= 4 byte access).
+  for (unsigned i = 0, e = GEP1VariableIndices.size();
+       i != e && GEP1BaseOffset;++i)
+    if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].second)
+      GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].second;
+  
+  // If our known offset is bigger than the access size, we know we don't have
+  // an alias.
+  if (GEP1BaseOffset) {
+    if (GEP1BaseOffset >= (int64_t)V2Size ||
+        GEP1BaseOffset <= -(int64_t)V1Size)
       return NoAlias;
-
-    // Otherwise we have to check to see that the distance is more than
-    // the size of the argument... build an index vector that is equal to
-    // the arguments provided, except substitute 0's for any variable
-    // indexes we find...
-    if (TD &&
-        cast<PointerType>(BasePtr->getType())->getElementType()->isSized()) {
-      for (unsigned i = 0; i != GEPOperands.size(); ++i)
-        if (!isa<ConstantInt>(GEPOperands[i]))
-          GEPOperands[i] = Constant::getNullValue(GEPOperands[i]->getType());
-      int64_t Offset = TD->getIndexedOffset(BasePtr->getType(),
-                                            &GEPOperands[0],
-                                            GEPOperands.size());
-
-      if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size)
-        return NoAlias;
-    }
   }
-
+  
   return MayAlias;
 }
 
-// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select instruction
-// against another.
+/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
+/// instruction against another.
 AliasAnalysis::AliasResult
 BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
                                 const Value *V2, unsigned V2Size) {
@@ -683,22 +686,31 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
         (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD)))
       return NoAlias;
   
-  // If one pointer is the result of a call/invoke and the other is a
+  // If one pointer is the result of a call/invoke or load and the other is a
   // non-escaping local object, then we know the object couldn't escape to a
-  // point where the call could return it.
-  if ((isa<CallInst>(O1) || isa<InvokeInst>(O1)) &&
-      isNonEscapingLocalObject(O2) && O1 != O2)
-    return NoAlias;
-  if ((isa<CallInst>(O2) || isa<InvokeInst>(O2)) &&
-      isNonEscapingLocalObject(O1) && O1 != O2)
-    return NoAlias;
+  // point where the call could return it. The load case works because
+  // isNonEscapingLocalObject considers all stores to be escapes (it
+  // passes true for the StoreCaptures argument to PointerMayBeCaptured).
+  if (O1 != O2) {
+    if ((isa<CallInst>(O1) || isa<InvokeInst>(O1) || isa<LoadInst>(O1) ||
+         isa<Argument>(O1)) &&
+        isNonEscapingLocalObject(O2))
+      return NoAlias;
+    if ((isa<CallInst>(O2) || isa<InvokeInst>(O2) || isa<LoadInst>(O2) ||
+         isa<Argument>(O2)) &&
+        isNonEscapingLocalObject(O1))
+      return NoAlias;
+  }
 
+  // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
+  // GEP can't simplify, we don't even look at the PHI cases.
   if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
     std::swap(V1, V2);
     std::swap(V1Size, V2Size);
+    std::swap(O1, O2);
   }
-  if (isa<GEPOperator>(V1))
-    return aliasGEP(V1, V1Size, V2, V2Size);
+  if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1))
+    return aliasGEP(GV1, V1Size, V2, V2Size, O1, O2);
 
   if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
     std::swap(V1, V2);
@@ -717,351 +729,5 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
   return MayAlias;
 }
 
-// This function is used to determine if the indices of two GEP instructions are
-// equal. V1 and V2 are the indices.
-static bool IndexOperandsEqual(Value *V1, Value *V2) {
-  if (V1->getType() == V2->getType())
-    return V1 == V2;
-  if (Constant *C1 = dyn_cast<Constant>(V1))
-    if (Constant *C2 = dyn_cast<Constant>(V2)) {
-      // Sign extend the constants to long types, if necessary
-      if (C1->getType() != Type::getInt64Ty(C1->getContext()))
-        C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
-      if (C2->getType() != Type::getInt64Ty(C1->getContext())) 
-        C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
-      return C1 == C2;
-    }
-  return false;
-}
-
-/// CheckGEPInstructions - Check two GEP instructions with known must-aliasing
-/// base pointers.  This checks to see if the index expressions preclude the
-/// pointers from aliasing...
-AliasAnalysis::AliasResult 
-BasicAliasAnalysis::CheckGEPInstructions(
-  const Type* BasePtr1Ty, Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1S,
-  const Type *BasePtr2Ty, Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2S) {
-  // We currently can't handle the case when the base pointers have different
-  // primitive types.  Since this is uncommon anyway, we are happy being
-  // extremely conservative.
-  if (BasePtr1Ty != BasePtr2Ty)
-    return MayAlias;
-
-  const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
-
-  // Find the (possibly empty) initial sequence of equal values... which are not
-  // necessarily constants.
-  unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
-  unsigned MinOperands = std::min(NumGEP1Operands, NumGEP2Operands);
-  unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands);
-  unsigned UnequalOper = 0;
-  while (UnequalOper != MinOperands &&
-         IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) {
-    // Advance through the type as we go...
-    ++UnequalOper;
-    if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
-      BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[UnequalOper-1]);
-    else {
-      // If all operands equal each other, then the derived pointers must
-      // alias each other...
-      BasePtr1Ty = 0;
-      assert(UnequalOper == NumGEP1Operands && UnequalOper == NumGEP2Operands &&
-             "Ran out of type nesting, but not out of operands?");
-      return MustAlias;
-    }
-  }
-
-  // If we have seen all constant operands, and run out of indexes on one of the
-  // getelementptrs, check to see if the tail of the leftover one is all zeros.
-  // If so, return mustalias.
-  if (UnequalOper == MinOperands) {
-    if (NumGEP1Ops < NumGEP2Ops) {
-      std::swap(GEP1Ops, GEP2Ops);
-      std::swap(NumGEP1Ops, NumGEP2Ops);
-    }
-
-    bool AllAreZeros = true;
-    for (unsigned i = UnequalOper; i != MaxOperands; ++i)
-      if (!isa<Constant>(GEP1Ops[i]) ||
-          !cast<Constant>(GEP1Ops[i])->isNullValue()) {
-        AllAreZeros = false;
-        break;
-      }
-    if (AllAreZeros) return MustAlias;
-  }
-
-
-  // So now we know that the indexes derived from the base pointers,
-  // which are known to alias, are different.  We can still determine a
-  // no-alias result if there are differing constant pairs in the index
-  // chain.  For example:
-  //        A[i][0] != A[j][1] iff (&A[0][1]-&A[0][0] >= std::max(G1S, G2S))
-  //
-  // We have to be careful here about array accesses.  In particular, consider:
-  //        A[1][0] vs A[0][i]
-  // In this case, we don't *know* that the array will be accessed in bounds:
-  // the index could even be negative.  Because of this, we have to
-  // conservatively *give up* and return may alias.  We disregard differing
-  // array subscripts that are followed by a variable index without going
-  // through a struct.
-  //
-  unsigned SizeMax = std::max(G1S, G2S);
-  if (SizeMax == ~0U) return MayAlias; // Avoid frivolous work.
-
-  // Scan for the first operand that is constant and unequal in the
-  // two getelementptrs...
-  unsigned FirstConstantOper = UnequalOper;
-  for (; FirstConstantOper != MinOperands; ++FirstConstantOper) {
-    const Value *G1Oper = GEP1Ops[FirstConstantOper];
-    const Value *G2Oper = GEP2Ops[FirstConstantOper];
-
-    if (G1Oper != G2Oper)   // Found non-equal constant indexes...
-      if (Constant *G1OC = dyn_cast<ConstantInt>(const_cast<Value*>(G1Oper)))
-        if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){
-          if (G1OC->getType() != G2OC->getType()) {
-            // Sign extend both operands to long.
-            const Type *Int64Ty = Type::getInt64Ty(G1OC->getContext());
-            if (G1OC->getType() != Int64Ty)
-              G1OC = ConstantExpr::getSExt(G1OC, Int64Ty);
-            if (G2OC->getType() != Int64Ty) 
-              G2OC = ConstantExpr::getSExt(G2OC, Int64Ty);
-            GEP1Ops[FirstConstantOper] = G1OC;
-            GEP2Ops[FirstConstantOper] = G2OC;
-          }
-          
-          if (G1OC != G2OC) {
-            // Handle the "be careful" case above: if this is an array/vector
-            // subscript, scan for a subsequent variable array index.
-            if (const SequentialType *STy =
-                  dyn_cast<SequentialType>(BasePtr1Ty)) {
-              const Type *NextTy = STy;
-              bool isBadCase = false;
-              
-              for (unsigned Idx = FirstConstantOper;
-                   Idx != MinOperands && isa<SequentialType>(NextTy); ++Idx) {
-                const Value *V1 = GEP1Ops[Idx], *V2 = GEP2Ops[Idx];
-                if (!isa<Constant>(V1) || !isa<Constant>(V2)) {
-                  isBadCase = true;
-                  break;
-                }
-                // If the array is indexed beyond the bounds of the static type
-                // at this level, it will also fall into the "be careful" case.
-                // It would theoretically be possible to analyze these cases,
-                // but for now just be conservatively correct.
-                if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
-                  if (cast<ConstantInt>(G1OC)->getZExtValue() >=
-                        ATy->getNumElements() ||
-                      cast<ConstantInt>(G2OC)->getZExtValue() >=
-                        ATy->getNumElements()) {
-                    isBadCase = true;
-                    break;
-                  }
-                if (const VectorType *VTy = dyn_cast<VectorType>(STy))
-                  if (cast<ConstantInt>(G1OC)->getZExtValue() >=
-                        VTy->getNumElements() ||
-                      cast<ConstantInt>(G2OC)->getZExtValue() >=
-                        VTy->getNumElements()) {
-                    isBadCase = true;
-                    break;
-                  }
-                STy = cast<SequentialType>(NextTy);
-                NextTy = cast<SequentialType>(NextTy)->getElementType();
-              }
-              
-              if (isBadCase) G1OC = 0;
-            }
-
-            // Make sure they are comparable (ie, not constant expressions), and
-            // make sure the GEP with the smaller leading constant is GEP1.
-            if (G1OC) {
-              Constant *Compare = ConstantExpr::getICmp(ICmpInst::ICMP_SGT, 
-                                                        G1OC, G2OC);
-              if (ConstantInt *CV = dyn_cast<ConstantInt>(Compare)) {
-                if (CV->getZExtValue()) {  // If they are comparable and G2 > G1
-                  std::swap(GEP1Ops, GEP2Ops);  // Make GEP1 < GEP2
-                  std::swap(NumGEP1Ops, NumGEP2Ops);
-                }
-                break;
-              }
-            }
-          }
-        }
-    BasePtr1Ty = cast<CompositeType>(BasePtr1Ty)->getTypeAtIndex(G1Oper);
-  }
-
-  // No shared constant operands, and we ran out of common operands.  At this
-  // point, the GEP instructions have run through all of their operands, and we
-  // haven't found evidence that there are any deltas between the GEP's.
-  // However, one GEP may have more operands than the other.  If this is the
-  // case, there may still be hope.  Check this now.
-  if (FirstConstantOper == MinOperands) {
-    // Without TargetData, we won't know what the offsets are.
-    if (!TD)
-      return MayAlias;
-
-    // Make GEP1Ops be the longer one if there is a longer one.
-    if (NumGEP1Ops < NumGEP2Ops) {
-      std::swap(GEP1Ops, GEP2Ops);
-      std::swap(NumGEP1Ops, NumGEP2Ops);
-    }
-
-    // Is there anything to check?
-    if (NumGEP1Ops > MinOperands) {
-      for (unsigned i = FirstConstantOper; i != MaxOperands; ++i)
-        if (isa<ConstantInt>(GEP1Ops[i]) && 
-            !cast<ConstantInt>(GEP1Ops[i])->isZero()) {
-          // Yup, there's a constant in the tail.  Set all variables to
-          // constants in the GEP instruction to make it suitable for
-          // TargetData::getIndexedOffset.
-          for (i = 0; i != MaxOperands; ++i)
-            if (!isa<ConstantInt>(GEP1Ops[i]))
-              GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType());
-          // Okay, now get the offset.  This is the relative offset for the full
-          // instruction.
-          int64_t Offset1 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops,
-                                                 NumGEP1Ops);
-
-          // Now check without any constants at the end.
-          int64_t Offset2 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops,
-                                                 MinOperands);
-
-          // Make sure we compare the absolute difference.
-          if (Offset1 > Offset2)
-            std::swap(Offset1, Offset2);
-
-          // If the tail provided a bit enough offset, return noalias!
-          if ((uint64_t)(Offset2-Offset1) >= SizeMax)
-            return NoAlias;
-          // Otherwise break - we don't look for another constant in the tail.
-          break;
-        }
-    }
-
-    // Couldn't find anything useful.
-    return MayAlias;
-  }
-
-  // If there are non-equal constants arguments, then we can figure
-  // out a minimum known delta between the two index expressions... at
-  // this point we know that the first constant index of GEP1 is less
-  // than the first constant index of GEP2.
-
-  // Advance BasePtr[12]Ty over this first differing constant operand.
-  BasePtr2Ty = cast<CompositeType>(BasePtr1Ty)->
-      getTypeAtIndex(GEP2Ops[FirstConstantOper]);
-  BasePtr1Ty = cast<CompositeType>(BasePtr1Ty)->
-      getTypeAtIndex(GEP1Ops[FirstConstantOper]);
-
-  // We are going to be using TargetData::getIndexedOffset to determine the
-  // offset that each of the GEP's is reaching.  To do this, we have to convert
-  // all variable references to constant references.  To do this, we convert the
-  // initial sequence of array subscripts into constant zeros to start with.
-  const Type *ZeroIdxTy = GEPPointerTy;
-  for (unsigned i = 0; i != FirstConstantOper; ++i) {
-    if (!isa<StructType>(ZeroIdxTy))
-      GEP1Ops[i] = GEP2Ops[i] = 
-              Constant::getNullValue(Type::getInt32Ty(ZeroIdxTy->getContext()));
-
-    if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy))
-      ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]);
-  }
-
-  // We know that GEP1Ops[FirstConstantOper] & GEP2Ops[FirstConstantOper] are ok
-
-  // Loop over the rest of the operands...
-  for (unsigned i = FirstConstantOper+1; i != MaxOperands; ++i) {
-    const Value *Op1 = i < NumGEP1Ops ? GEP1Ops[i] : 0;
-    const Value *Op2 = i < NumGEP2Ops ? GEP2Ops[i] : 0;
-    // If they are equal, use a zero index...
-    if (Op1 == Op2 && BasePtr1Ty == BasePtr2Ty) {
-      if (!isa<ConstantInt>(Op1))
-        GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Op1->getType());
-      // Otherwise, just keep the constants we have.
-    } else {
-      if (Op1) {
-        if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
-          // If this is an array index, make sure the array element is in range.
-          if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty)) {
-            if (Op1C->getZExtValue() >= AT->getNumElements())
-              return MayAlias;  // Be conservative with out-of-range accesses
-          } else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty)) {
-            if (Op1C->getZExtValue() >= VT->getNumElements())
-              return MayAlias;  // Be conservative with out-of-range accesses
-          }
-          
-        } else {
-          // GEP1 is known to produce a value less than GEP2.  To be
-          // conservatively correct, we must assume the largest possible
-          // constant is used in this position.  This cannot be the initial
-          // index to the GEP instructions (because we know we have at least one
-          // element before this one with the different constant arguments), so
-          // we know that the current index must be into either a struct or
-          // array.  Because we know it's not constant, this cannot be a
-          // structure index.  Because of this, we can calculate the maximum
-          // value possible.
-          //
-          if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty))
-            GEP1Ops[i] =
-                  ConstantInt::get(Type::getInt64Ty(AT->getContext()), 
-                                   AT->getNumElements()-1);
-          else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty))
-            GEP1Ops[i] = 
-                  ConstantInt::get(Type::getInt64Ty(VT->getContext()),
-                                   VT->getNumElements()-1);
-        }
-      }
-
-      if (Op2) {
-        if (const ConstantInt *Op2C = dyn_cast<ConstantInt>(Op2)) {
-          // If this is an array index, make sure the array element is in range.
-          if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr2Ty)) {
-            if (Op2C->getZExtValue() >= AT->getNumElements())
-              return MayAlias;  // Be conservative with out-of-range accesses
-          } else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr2Ty)) {
-            if (Op2C->getZExtValue() >= VT->getNumElements())
-              return MayAlias;  // Be conservative with out-of-range accesses
-          }
-        } else {  // Conservatively assume the minimum value for this index
-          GEP2Ops[i] = Constant::getNullValue(Op2->getType());
-        }
-      }
-    }
-
-    if (BasePtr1Ty && Op1) {
-      if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
-        BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[i]);
-      else
-        BasePtr1Ty = 0;
-    }
-
-    if (BasePtr2Ty && Op2) {
-      if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr2Ty))
-        BasePtr2Ty = CT->getTypeAtIndex(GEP2Ops[i]);
-      else
-        BasePtr2Ty = 0;
-    }
-  }
-
-  if (TD && GEPPointerTy->getElementType()->isSized()) {
-    int64_t Offset1 =
-      TD->getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops);
-    int64_t Offset2 = 
-      TD->getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops);
-    assert(Offset1 != Offset2 &&
-           "There is at least one different constant here!");
-    
-    // Make sure we compare the absolute difference.
-    if (Offset1 > Offset2)
-      std::swap(Offset1, Offset2);
-    
-    if ((uint64_t)(Offset2-Offset1) >= SizeMax) {
-      //cerr << "Determined that these two GEP's don't alias ["
-      //     << SizeMax << " bytes]: \n" << *GEP1 << *GEP2;
-      return NoAlias;
-    }
-  }
-  return MayAlias;
-}
-
-// Make sure that anything that uses AliasAnalysis pulls in this file...
+// Make sure that anything that uses AliasAnalysis pulls in this file.
 DEFINING_FILE_FOR(BasicAliasAnalysis)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index f615881..a276c64 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Instructions.h"
 #include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CallSite.h"
@@ -28,8 +29,11 @@ using namespace llvm;
 /// by the enclosing function (which is required to exist).  This routine can
 /// be expensive, so consider caching the results.  The boolean ReturnCaptures
 /// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not.  The boolean StoreCaptures specified whether
+/// storing the value (or part of it) into memory anywhere automatically
 /// counts as capturing it or not.
-bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
+bool llvm::PointerMayBeCaptured(const Value *V,
+                                bool ReturnCaptures, bool StoreCaptures) {
   assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!");
   SmallVector<Use*, 16> Worklist;
   SmallSet<Use*, 16> Visited;
@@ -53,8 +57,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
       // Not captured if the callee is readonly, doesn't return a copy through
       // its return value and doesn't unwind (a readonly function can leak bits
       // by throwing an exception or not depending on the input value).
-      if (CS.onlyReadsMemory() && CS.doesNotThrow() &&
-          I->getType() == Type::getVoidTy(V->getContext()))
+      if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
         break;
 
       // Not captured if only passed via 'nocapture' arguments.  Note that
@@ -82,7 +85,11 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
       break;
     case Instruction::Store:
       if (V == I->getOperand(0))
-        // Stored the pointer - it may be captured.
+        // Stored the pointer - conservatively assume it may be captured.
+        // TODO: If StoreCaptures is not true, we could do Fancy analysis
+        // to determine whether this store is not actually an escape point.
+        // In that case, BasicAliasAnalysis should be updated as well to
+        // take advantage of this.
         return true;
       // Storing to the pointee does not cause the pointer to be captured.
       break;
@@ -98,6 +105,18 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) {
           Worklist.push_back(U);
       }
       break;
+    case Instruction::ICmp:
+      // Don't count comparisons of a no-alias return value against null as
+      // captures. This allows us to ignore comparisons of malloc results
+      // with null, for example.
+      if (isNoAliasCall(V->stripPointerCasts()))
+        if (ConstantPointerNull *CPN =
+              dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+          if (CPN->getType()->getAddressSpace() == 0)
+            break;
+      // Otherwise, be conservative. There are crazy ways to capture pointers
+      // using comparisons.
+      return true;
     default:
       // Something else - be conservative and say it is captured.
       return true;
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 1cdadbf..96f738e 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -564,6 +564,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
   // we eliminate over-indexing of the notional static type array bounds.
   // This makes it easy to determine if the getelementptr is "inbounds".
   // Also, this helps GlobalOpt do SROA on GlobalVariables.
+  Ptr = cast<Constant>(Ptr->stripPointerCasts());
   const Type *Ty = Ptr->getType();
   SmallVector<Constant*, 32> NewIdxs;
   do {
@@ -671,8 +672,13 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
 Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
                                                const TargetData *TD) {
   SmallVector<Constant*, 8> Ops;
-  for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
-    Ops.push_back(cast<Constant>(*i));
+  for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) {
+    Constant *NewC = cast<Constant>(*i);
+    // Recursively fold the ConstantExpr's operands.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
+      NewC = ConstantFoldConstantExpression(NewCE, TD);
+    Ops.push_back(NewC);
+  }
 
   if (CE->isCompare())
     return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
@@ -687,6 +693,10 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
 /// attempting to fold instructions like loads and stores, which have no
 /// constant expression form.
 ///
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
+/// information, due to only being passed an opcode and operands. Constant
+/// folding using this function strips this information.
+///
 Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, 
                                          Constant* const* Ops, unsigned NumOps,
                                          const TargetData *TD) {
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 8f62245..41d803c 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -78,19 +78,16 @@ DIDescriptor::DIDescriptor(MDNode *N, unsigned RequiredTag) {
   }
 }
 
-const char *
+StringRef 
 DIDescriptor::getStringField(unsigned Elt) const {
   if (DbgNode == 0)
-    return NULL;
+    return StringRef();
 
   if (Elt < DbgNode->getNumElements())
-    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getElement(Elt))) {
-      if (MDS->getLength() == 0)
-        return NULL;
-      return MDS->getString().data();
-    }
+    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getElement(Elt)))
+      return MDS->getString();
 
-  return NULL;
+  return StringRef();
 }
 
 uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
@@ -310,8 +307,8 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
 bool DICompileUnit::Verify() const {
   if (isNull())
     return false;
-  const char *N = getFilename();
-  if (!N)
+  StringRef N = getFilename();
+  if (N.empty())
     return false;
   // It is possible that directory and produce string is empty.
   return true;
@@ -366,7 +363,7 @@ bool DIGlobalVariable::Verify() const {
   if (isNull())
     return false;
 
-  if (!getDisplayName())
+  if (getDisplayName().empty())
     return false;
 
   if (getContext().isNull())
@@ -426,15 +423,15 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
 /// information for the function F.
 bool DISubprogram::describes(const Function *F) {
   assert (F && "Invalid function");
-  const char *Name = getLinkageName();
-  if (!Name)
+  StringRef Name = getLinkageName();
+  if (Name.empty())
     Name = getName();
-  if (strcmp(F->getName().data(), Name) == 0)
+  if (F->getName() == Name)
     return true;
   return false;
 }
 
-const char *DIScope::getFilename() const {
+StringRef DIScope::getFilename() const {
   if (isLexicalBlock()) 
     return DILexicalBlock(DbgNode).getFilename();
   else if (isSubprogram())
@@ -443,10 +440,10 @@ const char *DIScope::getFilename() const {
     return DICompileUnit(DbgNode).getFilename();
   else 
     assert (0 && "Invalid DIScope!");
-  return NULL;
+  return StringRef();
 }
 
-const char *DIScope::getDirectory() const {
+StringRef DIScope::getDirectory() const {
   if (isLexicalBlock()) 
     return DILexicalBlock(DbgNode).getDirectory();
   else if (isSubprogram())
@@ -455,7 +452,7 @@ const char *DIScope::getDirectory() const {
     return DICompileUnit(DbgNode).getDirectory();
   else 
     assert (0 && "Invalid DIScope!");
-  return NULL;
+  return StringRef();
 }
 
 //===----------------------------------------------------------------------===//
@@ -481,7 +478,8 @@ void DICompileUnit::dump() const {
 void DIType::dump() const {
   if (isNull()) return;
 
-  if (const char *Res = getName())
+  StringRef Res = getName();
+  if (!Res.empty())
     errs() << " [" << Res << "] ";
 
   unsigned Tag = getTag();
@@ -538,7 +536,8 @@ void DICompositeType::dump() const {
 
 /// dump - Print global.
 void DIGlobal::dump() const {
-  if (const char *Res = getName())
+  StringRef Res = getName();
+  if (!Res.empty())
     errs() << " [" << Res << "] ";
 
   unsigned Tag = getTag();
@@ -562,7 +561,8 @@ void DIGlobal::dump() const {
 
 /// dump - Print subprogram.
 void DISubprogram::dump() const {
-  if (const char *Res = getName())
+  StringRef Res = getName();
+  if (!Res.empty())
     errs() << " [" << Res << "] ";
 
   unsigned Tag = getTag();
@@ -590,7 +590,8 @@ void DIGlobalVariable::dump() const {
 
 /// dump - Print variable.
 void DIVariable::dump() const {
-  if (const char *Res = getName())
+  StringRef Res = getName();
+  if (!Res.empty())
     errs() << " [" << Res << "] ";
 
   getCompileUnit().dump();
@@ -651,12 +652,12 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
 /// CreateCompileUnit - Create a new descriptor for the specified compile
 /// unit.  Note that this does not unique compile units within the module.
 DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
-                                           const char * Filename,
-                                           const char * Directory,
-                                           const char * Producer,
+                                           StringRef Filename,
+                                           StringRef Directory,
+                                           StringRef Producer,
                                            bool isMain,
                                            bool isOptimized,
-                                           const char *Flags,
+                                           StringRef Flags,
                                            unsigned RunTimeVer) {
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_compile_unit),
@@ -675,7 +676,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
 }
 
 /// CreateEnumerator - Create a single enumerator value.
-DIEnumerator DIFactory::CreateEnumerator(const char * Name, uint64_t Val){
+DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_enumerator),
     MDString::get(VMContext, Name),
@@ -687,7 +688,7 @@ DIEnumerator DIFactory::CreateEnumerator(const char * Name, uint64_t Val){
 
 /// CreateBasicType - Create a basic type like int, float, etc.
 DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
-                                       const char * Name,
+                                       StringRef Name,
                                        DICompileUnit CompileUnit,
                                        unsigned LineNumber,
                                        uint64_t SizeInBits,
@@ -712,7 +713,7 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
 
 /// CreateBasicType - Create a basic type like int, float, etc.
 DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
-                                         const char * Name,
+                                         StringRef Name,
                                          DICompileUnit CompileUnit,
                                          unsigned LineNumber,
                                          Constant *SizeInBits,
@@ -739,7 +740,7 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
 /// pointer, typedef, etc.
 DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
                                            DIDescriptor Context,
-                                           const char * Name,
+                                           StringRef Name,
                                            DICompileUnit CompileUnit,
                                            unsigned LineNumber,
                                            uint64_t SizeInBits,
@@ -767,7 +768,7 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
 /// pointer, typedef, etc.
 DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
                                              DIDescriptor Context,
-                                             const char * Name,
+                                             StringRef Name,
                                              DICompileUnit CompileUnit,
                                              unsigned LineNumber,
                                              Constant *SizeInBits,
@@ -794,7 +795,7 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                DIDescriptor Context,
-                                               const char * Name,
+                                               StringRef Name,
                                                DICompileUnit CompileUnit,
                                                unsigned LineNumber,
                                                uint64_t SizeInBits,
@@ -826,7 +827,7 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
                                                  DIDescriptor Context,
-                                                 const char * Name,
+                                                 StringRef Name,
                                                  DICompileUnit CompileUnit,
                                                  unsigned LineNumber,
                                                  Constant *SizeInBits,
@@ -859,9 +860,9 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
 /// See comments in DISubprogram for descriptions of these fields.  This
 /// method does not unique the generated descriptors.
 DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
-                                         const char * Name,
-                                         const char * DisplayName,
-                                         const char * LinkageName,
+                                         StringRef Name,
+                                         StringRef DisplayName,
+                                         StringRef LinkageName,
                                          DICompileUnit CompileUnit,
                                          unsigned LineNo, DIType Type,
                                          bool isLocalToUnit,
@@ -885,9 +886,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
 DIGlobalVariable
-DIFactory::CreateGlobalVariable(DIDescriptor Context, const char * Name,
-                                const char * DisplayName,
-                                const char * LinkageName,
+DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
+                                StringRef DisplayName,
+                                StringRef LinkageName,
                                 DICompileUnit CompileUnit,
                                 unsigned LineNo, DIType Type,bool isLocalToUnit,
                                 bool isDefinition, llvm::GlobalVariable *Val) {
@@ -919,7 +920,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, const char * Name,
 
 /// CreateVariable - Create a new descriptor for the specified variable.
 DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
-                                     const char * Name,
+                                     StringRef Name,
                                      DICompileUnit CompileUnit, unsigned LineNo,
                                      DIType Type) {
   Value *Elts[] = {
@@ -976,6 +977,17 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
   return DILocation(MDNode::get(VMContext, &Elts[0], 4));
 }
 
+/// CreateLocation - Creates a debug info location.
+DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
+                                     DIScope S, MDNode *OrigLoc) {
+ Value *Elts[] = {
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
+    S.getNode(),
+    OrigLoc
+  };
+  return DILocation(MDNode::get(VMContext, &Elts[0], 4));
+}
 
 //===----------------------------------------------------------------------===//
 // DIFactory: Routines for inserting code into a function
@@ -1263,7 +1275,8 @@ bool getLocationInfo(const Value *V, std::string &DisplayName,
       if (!DIGV) return false;
       DIGlobalVariable Var(cast<MDNode>(DIGV));
 
-      if (const char *D = Var.getDisplayName())
+      StringRef D = Var.getDisplayName();
+      if (!D.empty())
         DisplayName = D;
       LineNo = Var.getLineNumber();
       Unit = Var.getCompileUnit();
@@ -1273,18 +1286,22 @@ bool getLocationInfo(const Value *V, std::string &DisplayName,
       if (!DDI) return false;
       DIVariable Var(cast<MDNode>(DDI->getVariable()));
 
-      if (const char *D = Var.getName())
+      StringRef D = Var.getName();
+      if (!D.empty())
         DisplayName = D;
       LineNo = Var.getLineNumber();
       Unit = Var.getCompileUnit();
       TypeD = Var.getType();
     }
 
-    if (const char *T = TypeD.getName())
+    StringRef T = TypeD.getName();
+    if (!T.empty())
       Type = T;
-    if (const char *F = Unit.getFilename())
+    StringRef F = Unit.getFilename();
+    if (!F.empty())
       File = F;
-    if (const char *D = Unit.getDirectory())
+    StringRef D = Unit.getDirectory();
+    if (!D.empty())
       Dir = D;
     return true;
   }
@@ -1398,4 +1415,36 @@ bool getLocationInfo(const Value *V, std::string &DisplayName,
 
     return DebugLoc::get(Id);
   }
+
+  /// getDISubprogram - Find subprogram that is enclosing this scope.
+  DISubprogram getDISubprogram(MDNode *Scope) {
+    DIDescriptor D(Scope);
+    if (D.isNull())
+      return DISubprogram();
+    
+    if (D.isCompileUnit())
+      return DISubprogram();
+    
+    if (D.isSubprogram())
+      return DISubprogram(Scope);
+    
+    if (D.isLexicalBlock())
+      return getDISubprogram(DILexicalBlock(Scope).getContext().getNode());
+    
+    return DISubprogram();
+  }
+
+  /// getDICompositeType - Find underlying composite type.
+  DICompositeType getDICompositeType(DIType T) {
+    if (T.isNull())
+      return DICompositeType();
+    
+    if (T.isCompositeType())
+      return DICompositeType(T.getNode());
+    
+    if (T.isDerivedType())
+      return getDICompositeType(DIDerivedType(T.getNode()).getTypeDerivedFrom());
+    
+    return DICompositeType();
+  }
 }
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index f1b44d0..32b8994 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -30,46 +30,55 @@ using namespace llvm;
 namespace llvm {
 template<>
 struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
-  static std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph,
-                                  bool ShortNames) {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
 
     BasicBlock *BB = Node->getBlock();
 
     if (!BB)
       return "Post dominance root node";
 
-    return DOTGraphTraits<const Function*>::getNodeLabel(BB, BB->getParent(),
-                                                         ShortNames);
+
+    if (isSimple())
+      return DOTGraphTraits<const Function*>
+	       ::getSimpleNodeLabel(BB, BB->getParent());
+    else
+      return DOTGraphTraits<const Function*>
+	       ::getCompleteNodeLabel(BB, BB->getParent());
   }
 };
 
 template<>
 struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
 
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
   static std::string getGraphName(DominatorTree *DT) {
     return "Dominator tree";
   }
 
-  static std::string getNodeLabel(DomTreeNode *Node,
-                                  DominatorTree *G,
-                                  bool ShortNames) {
-    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode(),
-                                                      ShortNames);
+  std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
   }
 };
 
 template<>
 struct DOTGraphTraits<PostDominatorTree*>
   : public DOTGraphTraits<DomTreeNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
   static std::string getGraphName(PostDominatorTree *DT) {
     return "Post dominator tree";
   }
-  static std::string getNodeLabel(DomTreeNode *Node,
-                                  PostDominatorTree *G,
-                                  bool ShortNames) {
-    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node,
-                                                      G->getRootNode(),
-                                                      ShortNames);
+
+  std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
   }
 };
 }
@@ -85,9 +94,11 @@ struct GenericGraphViewer : public FunctionPass {
 
   virtual bool runOnFunction(Function &F) {
     Analysis *Graph;
-
+    std::string Title, GraphName;
     Graph = &getAnalysis<Analysis>();
-    ViewGraph(Graph, Name, OnlyBBS);
+    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
+    Title = GraphName + " for '" + F.getNameStr() + "' function";
+    ViewGraph(Graph, Name, OnlyBBS, Title);
 
     return false;
   }
@@ -163,8 +174,12 @@ struct GenericGraphPrinter : public FunctionPass {
     raw_fd_ostream File(Filename.c_str(), ErrorInfo);
     Graph = &getAnalysis<Analysis>();
 
+    std::string Title, GraphName;
+    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
+    Title = GraphName + " for '" + F.getNameStr() + "' function";
+
     if (ErrorInfo.empty())
-      WriteGraph(File, Graph, OnlyBBS);
+      WriteGraph(File, Graph, OnlyBBS, Name, Title);
     else
       errs() << "  error opening file for writing!";
     errs() << "\n";
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 40a8cd5..e12db81 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -484,7 +484,6 @@ namespace {
                       const Value *V2, unsigned V2Size);
     virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
     virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
-    void getMustAliases(Value *P, std::vector<Value*> &RetVals);
     bool pointsToConstantMemory(const Value *P);
 
     virtual void deleteValue(Value *V) {
@@ -680,32 +679,6 @@ Andersens::getModRefInfo(CallSite CS1, CallSite CS2) {
   return AliasAnalysis::getModRefInfo(CS1,CS2);
 }
 
-/// getMustAlias - We can provide must alias information if we know that a
-/// pointer can only point to a specific function or the null pointer.
-/// Unfortunately we cannot determine must-alias information for global
-/// variables or any other memory memory objects because we do not track whether
-/// a pointer points to the beginning of an object or a field of it.
-void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) {
-  Node *N = &GraphNodes[FindNode(getNode(P))];
-  if (N->PointsTo->count() == 1) {
-    Node *Pointee = &GraphNodes[N->PointsTo->find_first()];
-    // If a function is the only object in the points-to set, then it must be
-    // the destination.  Note that we can't handle global variables here,
-    // because we don't know if the pointer is actually pointing to a field of
-    // the global or to the beginning of it.
-    if (Value *V = Pointee->getValue()) {
-      if (Function *F = dyn_cast<Function>(V))
-        RetVals.push_back(F);
-    } else {
-      // If the object in the points-to set is the null object, then the null
-      // pointer is a must alias.
-      if (Pointee == &GraphNodes[NullObject])
-        RetVals.push_back(Constant::getNullValue(P->getType()));
-    }
-  }
-  AliasAnalysis::getMustAliases(P, RetVals);
-}
-
 /// pointsToConstantMemory - If we can determine that this pointer only points
 /// to constant memory, return true.  In practice, this means that if the
 /// pointer can only point to constant globals, functions, or the null pointer,
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index ddd6ff9..a979a99 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -111,7 +111,6 @@ namespace {
     ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
-    bool hasNoModRefInfoForCalls() const { return false; }
 
     /// getModRefBehavior - Return the behavior of the specified function if
     /// called from the specified call site.  The call site may be null in which
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index efe40e4..37747b6 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -24,7 +24,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CommandLine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -32,10 +31,6 @@ char IVUsers::ID = 0;
 static RegisterPass<IVUsers>
 X("iv-users", "Induction Variable Users", false, true);
 
-static cl::opt<bool>
-SimplifyIVUsers("simplify-iv-users", cl::Hidden, cl::init(false),
-          cl::desc("Restrict IV Users to loop-invariant strides"));
-
 Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
@@ -214,8 +209,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     return false;  // Non-reducible symbolic expression, bail out.
 
   // Keep things simple. Don't touch loop-variant strides.
-  if (SimplifyIVUsers && !Stride->isLoopInvariant(L)
-      && L->contains(I->getParent()))
+  if (!Stride->isLoopInvariant(L) && L->contains(I->getParent()))
     return false;
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index f9953e3..b53ac13 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -21,13 +21,41 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-/// SimplifyAndInst - Given operands for an And, see if we can
+/// SimplifyAddInst - Given operands for an Add, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1,
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const TargetData *TD) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+    
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+  
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    // X + undef -> undef
+    if (isa<UndefValue>(Op1C))
+      return Op1C;
+    
+    // X + 0 --> X
+    if (Op1C->isNullValue())
+      return Op0;
+  }
+  
+  // FIXME: Could pull several more out of instcombine.
+  return 0;
+}
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
                                       Ops, 2, TD);
     }
@@ -83,8 +111,7 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1,
 
 /// SimplifyOrInst - Given operands for an Or, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1,
-                            const TargetData *TD) {
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
@@ -142,8 +169,6 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1,
 }
 
 
-
-
 static const Type *GetCompareTy(Value *Op) {
   return CmpInst::makeCmpResultType(Op->getType());
 }
@@ -264,6 +289,34 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   return 0;
 }
 
+/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
+                             const TargetData *TD) {
+  // getelementptr P -> P.
+  if (NumOps == 1)
+    return Ops[0];
+
+  // TODO.
+  //if (isa<UndefValue>(Ops[0]))
+  //  return UndefValue::get(GEP.getType());
+
+  // getelementptr P, 0 -> P.
+  if (NumOps == 2)
+    if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
+      if (C->isZero())
+        return Ops[0];
+  
+  // Check to see if this is constant foldable.
+  for (unsigned i = 0; i != NumOps; ++i)
+    if (!isa<Constant>(Ops[i]))
+      return 0;
+  
+  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
+                                        (Constant *const*)Ops+1, NumOps-1);
+}
+
+
 //=== Helper functions for higher up the class hierarchy.
 
 /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
@@ -299,6 +352,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
   switch (I->getOpcode()) {
   default:
     return ConstantFoldInstruction(I, TD);
+  case Instruction::Add:
+    return SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+                           cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                           cast<BinaryOperator>(I)->hasNoUnsignedWrap(), TD);
   case Instruction::And:
     return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD);
   case Instruction::Or:
@@ -309,6 +366,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
   case Instruction::FCmp:
     return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
                             I->getOperand(0), I->getOperand(1), TD);
+  case Instruction::GetElementPtr: {
+    SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+    return SimplifyGEPInst(&Ops[0], Ops.size(), TD);
+  }
   }
 }
 
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 1c614b0..4de756c 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -243,6 +243,11 @@ unsigned Loop::getSmallConstantTripMultiple() const {
         case BinaryOperator::Mul:
           Result = dyn_cast<ConstantInt>(BO->getOperand(1));
           break;
+        case BinaryOperator::Shl:
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+            if (CI->getValue().getActiveBits() <= 5)
+              return 1u << CI->getZExtValue();
+          break;
         default:
           break;
         }
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 0ec0e74..ae6f970 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -20,6 +20,8 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -117,10 +119,6 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
       Pointer = Inst->getOperand(1);
       // calls to free() erase the entire structure
       PointerSize = ~0ULL;
-    } else if (isFreeCall(Inst)) {
-      Pointer = Inst->getOperand(0);
-      // calls to free() erase the entire structure
-      PointerSize = ~0ULL;
     } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
       // Debug intrinsics don't cause dependences.
       if (isa<DbgInfoIntrinsic>(Inst)) continue;
@@ -174,7 +172,7 @@ MemDepResult MemoryDependenceAnalysis::
 getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, 
                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
 
-  Value* invariantTag = 0;
+  Value *invariantTag = 0;
 
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
@@ -185,12 +183,12 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     if (invariantTag == Inst) {
       invariantTag = 0;
       continue;
-    } else if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) {
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
       // If we pass an invariant-end marker, then we've just entered an
       // invariant region and can start ignoring dependencies.
       if (II->getIntrinsicID() == Intrinsic::invariant_end) {
         uint64_t invariantSize = ~0ULL;
-        if (ConstantInt* CI = dyn_cast<ConstantInt>(II->getOperand(2)))
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(2)))
           invariantSize = CI->getZExtValue();
         
         AliasAnalysis::AliasResult R =
@@ -203,9 +201,9 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
       } else if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-                   II->getIntrinsicID() == Intrinsic::lifetime_end) {
+                 II->getIntrinsicID() == Intrinsic::lifetime_end) {
         uint64_t invariantSize = ~0ULL;
-        if (ConstantInt* CI = dyn_cast<ConstantInt>(II->getOperand(1)))
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(1)))
           invariantSize = CI->getZExtValue();
 
         AliasAnalysis::AliasResult R =
@@ -371,20 +369,41 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
     // calls to free() erase the entire structure, not just a field.
     MemSize = ~0UL;
   } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
-    CallSite QueryCS = CallSite::get(QueryInst);
-    bool isReadOnly = AA->onlyReadsMemory(QueryCS);
-    LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
-                                           QueryParent);
+    int IntrinsicID = 0;  // Intrinsic IDs start at 1.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+      IntrinsicID = II->getIntrinsicID();
+
+    switch (IntrinsicID) {
+      case Intrinsic::lifetime_start:
+      case Intrinsic::lifetime_end:
+      case Intrinsic::invariant_start:
+        MemPtr = QueryInst->getOperand(2);
+        MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
+        break;
+      case Intrinsic::invariant_end:
+        MemPtr = QueryInst->getOperand(3);
+        MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
+        break;
+      default:
+        CallSite QueryCS = CallSite::get(QueryInst);
+        bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+        LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+                                               QueryParent);
+    }
   } else {
     // Non-memory instruction.
     LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
   }
   
   // If we need to do a pointer scan, make it happen.
-  if (MemPtr)
-    LocalCache = getPointerDependencyFrom(MemPtr, MemSize, 
-                                          isa<LoadInst>(QueryInst),
-                                          ScanPos, QueryParent);
+  if (MemPtr) {
+    bool isLoad = !QueryInst->mayWriteToMemory();
+    if (IntrinsicInst *II = dyn_cast<MemoryUseIntrinsic>(QueryInst)) {
+      isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+    }
+    LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos,
+                                          QueryParent);
+  }
   
   // Remember the result!
   if (Instruction *I = LocalCache.getInst())
@@ -688,6 +707,274 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
   }
 }
 
+/// isPHITranslatable - Return true if the specified computation is derived from
+/// a PHI node in the current block and if it is simple enough for us to handle.
+static bool isPHITranslatable(Instruction *Inst) {
+  if (isa<PHINode>(Inst))
+    return true;
+  
+  // We can handle bitcast of a PHI, but the PHI needs to be in the same block
+  // as the bitcast.
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
+    Instruction *OpI = dyn_cast<Instruction>(BC->getOperand(0));
+    if (OpI == 0 || OpI->getParent() != Inst->getParent())
+      return true;
+    return isPHITranslatable(OpI);
+  }
+  
+  // We can translate a GEP if all of its operands defined in this block are phi
+  // translatable. 
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Instruction *OpI = dyn_cast<Instruction>(GEP->getOperand(i));
+      if (OpI == 0 || OpI->getParent() != Inst->getParent())
+        continue;
+      
+      if (!isPHITranslatable(OpI))
+        return false;
+    }
+    return true;
+  }
+  
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
+    if (OpI == 0 || OpI->getParent() != Inst->getParent())
+      return true;
+    return isPHITranslatable(OpI);
+  }
+
+  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+  
+  return false;
+}
+
+/// GetPHITranslatedValue - Given a computation that satisfied the
+/// isPHITranslatable predicate, see if we can translate the computation into
+/// the specified predecessor block.  If so, return that value.
+Value *MemoryDependenceAnalysis::
+GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred,
+                      const TargetData *TD) const {  
+  // If the input value is not an instruction, or if it is not defined in CurBB,
+  // then we don't need to phi translate it.
+  Instruction *Inst = dyn_cast<Instruction>(InVal);
+  if (Inst == 0 || Inst->getParent() != CurBB)
+    return InVal;
+  
+  if (PHINode *PN = dyn_cast<PHINode>(Inst))
+    return PN->getIncomingValueForBlock(Pred);
+  
+  // Handle bitcast of PHI.
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
+    // PHI translate the input operand.
+    Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD);
+    if (PHIIn == 0) return 0;
+    
+    // Constants are trivial to phi translate.
+    if (Constant *C = dyn_cast<Constant>(PHIIn))
+      return ConstantExpr::getBitCast(C, BC->getType());
+    
+    // Otherwise we have to see if a bitcasted version of the incoming pointer
+    // is available.  If so, we can use it, otherwise we have to fail.
+    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+         UI != E; ++UI) {
+      if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
+        if (BCI->getType() == BC->getType())
+          return BCI;
+    }
+    return 0;
+  }
+
+  // Handle getelementptr with at least one PHI translatable operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    BasicBlock *CurBB = GEP->getParent();
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *GEPOp = GEP->getOperand(i);
+      // No PHI translation is needed of operands whose values are live in to
+      // the predecessor block.
+      if (!isa<Instruction>(GEPOp) ||
+          cast<Instruction>(GEPOp)->getParent() != CurBB) {
+        GEPOps.push_back(GEPOp);
+        continue;
+      }
+      
+      // If the operand is a phi node, do phi translation.
+      Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD);
+      if (InOp == 0) return 0;
+      
+      GEPOps.push_back(InOp);
+    }
+    
+    // Simplify the GEP to handle 'gep x, 0' -> x etc.
+    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD))
+      return V;
+
+    // Scan to see if we have this GEP available.
+    Value *APHIOp = GEPOps[0];
+    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+         UI != E; ++UI) {
+      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+        if (GEPI->getType() == GEP->getType() &&
+            GEPI->getNumOperands() == GEPOps.size() &&
+            GEPI->getParent()->getParent() == CurBB->getParent()) {
+          bool Mismatch = false;
+          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+            if (GEPI->getOperand(i) != GEPOps[i]) {
+              Mismatch = true;
+              break;
+            }
+          if (!Mismatch)
+            return GEPI;
+        }
+    }
+    return 0;
+  }
+  
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Value *LHS;
+    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+    Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
+    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+    
+    if (OpI == 0 || OpI->getParent() != Inst->getParent())
+      LHS = Inst->getOperand(0);
+    else {
+      LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD);
+      if (LHS == 0)
+        return 0;
+    }
+    
+    // If the PHI translated LHS is an add of a constant, fold the immediates.
+    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+      if (BOp->getOpcode() == Instruction::Add)
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+          LHS = BOp->getOperand(0);
+          RHS = ConstantExpr::getAdd(RHS, CI);
+          isNSW = isNUW = false;
+        }
+    
+    // See if the add simplifies away.
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD))
+      return Res;
+    
+    // Otherwise, see if we have this add available somewhere.
+    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+         UI != E; ++UI) {
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+        if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+            BO->getParent()->getParent() == CurBB->getParent())
+          return BO;
+    }
+    
+    return 0;
+  }
+  
+  return 0;
+}
+
+/// GetAvailablePHITranslatePointer - Return the value computed by
+/// PHITranslatePointer if it dominates PredBB, otherwise return null.
+Value *MemoryDependenceAnalysis::
+GetAvailablePHITranslatedValue(Value *V,
+                               BasicBlock *CurBB, BasicBlock *PredBB,
+                               const TargetData *TD,
+                               const DominatorTree &DT) const {
+  // See if PHI translation succeeds.
+  V = GetPHITranslatedValue(V, CurBB, PredBB, TD);
+  if (V == 0) return 0;
+  
+  // Make sure the value is live in the predecessor.
+  if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+    if (!DT.dominates(Inst->getParent(), PredBB))
+      return 0;
+  return V;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block.  All newly created instructions are added to the NewInsts list.
+///
+Value *MemoryDependenceAnalysis::
+InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
+                           BasicBlock *PredBB, const TargetData *TD,
+                           const DominatorTree &DT,
+                           SmallVectorImpl<Instruction*> &NewInsts) const {
+  // See if we have a version of this value already available and dominating
+  // PredBB.  If so, there is no need to insert a new copy.
+  if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT))
+    return Res;
+  
+  // If we don't have an available version of this value, it must be an
+  // instruction.
+  Instruction *Inst = cast<Instruction>(InVal);
+  
+  // Handle bitcast of PHI translatable value.
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
+    Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0),
+                                              CurBB, PredBB, TD, DT, NewInsts);
+    if (OpVal == 0) return 0;
+      
+    // Otherwise insert a bitcast at the end of PredBB.
+    BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
+                                       InVal->getName()+".phi.trans.insert",
+                                       PredBB->getTerminator());
+    NewInsts.push_back(New);
+    return New;
+  }
+  
+  // Handle getelementptr with at least one PHI operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    BasicBlock *CurBB = GEP->getParent();
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i),
+                                                CurBB, PredBB, TD, DT, NewInsts);
+      if (OpVal == 0) return 0;
+      GEPOps.push_back(OpVal);
+    }
+    
+    GetElementPtrInst *Result = 
+      GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
+                                InVal->getName()+".phi.trans.insert",
+                                PredBB->getTerminator());
+    Result->setIsInBounds(GEP->isInBounds());
+    NewInsts.push_back(Result);
+    return Result;
+  }
+  
+#if 0
+  // FIXME: This code works, but it is unclear that we actually want to insert
+  // a big chain of computation in order to make a value available in a block.
+  // This needs to be evaluated carefully to consider its cost trade offs.
+  
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0),
+                                              CurBB, PredBB, TD, DT, NewInsts);
+    if (OpVal == 0) return 0;
+    
+    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+                                           InVal->getName()+".phi.trans.insert",
+                                                    PredBB->getTerminator());
+    Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+    NewInsts.push_back(Res);
+    return Res;
+  }
+#endif
+  
+  return 0;
+}
 
 /// getNonLocalPointerDepFromBB - Perform a dependency query based on
 /// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
@@ -831,66 +1118,107 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       NumSortedEntries = Cache->size();
     }
     
-    // If this is directly a PHI node, just use the incoming values for each
-    // pred as the phi translated version.
-    if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) {
-      Cache = 0;
-      
-      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
-        BasicBlock *Pred = *PI;
-        Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred);
-        
-        // Check to see if we have already visited this pred block with another
-        // pointer.  If so, we can't do this lookup.  This failure can occur
-        // with PHI translation when a critical edge exists and the PHI node in
-        // the successor translates to a pointer value different than the
-        // pointer the block was first analyzed with.
-        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
-          InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
+    // If this is a computation derived from a PHI node, use the suitably
+    // translated incoming values for each pred as the phi translated version.
+    if (!isPHITranslatable(PtrInst))
+      goto PredTranslationFailure;
 
-        if (!InsertRes.second) {
-          // If the predecessor was visited with PredPtr, then we already did
-          // the analysis and can ignore it.
-          if (InsertRes.first->second == PredPtr)
-            continue;
-          
-          // Otherwise, the block was previously analyzed with a different
-          // pointer.  We can't represent the result of this case, so we just
-          // treat this as a phi translation failure.
-          goto PredTranslationFailure;
-        }
+    Cache = 0;
+      
+    for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+      BasicBlock *Pred = *PI;
+      // Get the PHI translated pointer in this predecessor.  This can fail and
+      // return null if not translatable.
+      Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD);
+      
+      // Check to see if we have already visited this pred block with another
+      // pointer.  If so, we can't do this lookup.  This failure can occur
+      // with PHI translation when a critical edge exists and the PHI node in
+      // the successor translates to a pointer value different than the
+      // pointer the block was first analyzed with.
+      std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+        InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
 
-        // FIXME: it is entirely possible that PHI translating will end up with
-        // the same value.  Consider PHI translating something like:
-        // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
-        // to recurse here, pedantically speaking.
+      if (!InsertRes.second) {
+        // If the predecessor was visited with PredPtr, then we already did
+        // the analysis and can ignore it.
+        if (InsertRes.first->second == PredPtr)
+          continue;
         
-        // If we have a problem phi translating, fall through to the code below
-        // to handle the failure condition.
-        if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
-                                        Result, Visited))
-          goto PredTranslationFailure;
+        // Otherwise, the block was previously analyzed with a different
+        // pointer.  We can't represent the result of this case, so we just
+        // treat this as a phi translation failure.
+        goto PredTranslationFailure;
       }
       
-      // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
-      CacheInfo = &NonLocalPointerDeps[CacheKey];
-      Cache = &CacheInfo->second;
-      NumSortedEntries = Cache->size();
+      // If PHI translation was unable to find an available pointer in this
+      // predecessor, then we have to assume that the pointer is clobbered in
+      // that predecessor.  We can still do PRE of the load, which would insert
+      // a computation of the pointer in this predecessor.
+      if (PredPtr == 0) {
+        // Add the entry to the Result list.
+        NonLocalDepEntry Entry(Pred,
+                               MemDepResult::getClobber(Pred->getTerminator()));
+        Result.push_back(Entry);
+
+        // Add it to the cache for this CacheKey so that subsequent queries get
+        // this result.
+        Cache = &NonLocalPointerDeps[CacheKey].second;
+        MemoryDependenceAnalysis::NonLocalDepInfo::iterator It =
+          std::upper_bound(Cache->begin(), Cache->end(), Entry);
+        
+        if (It != Cache->begin() && prior(It)->first == Pred)
+          --It;
+
+        if (It == Cache->end() || It->first != Pred) {
+          Cache->insert(It, Entry);
+          // Add it to the reverse map.
+          ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
+        } else if (!It->second.isDirty()) {
+          // noop
+        } else if (It->second.getInst() == Pred->getTerminator()) {
+          // Same instruction, clear the dirty marker.
+          It->second = Entry.second;
+        } else if (It->second.getInst() == 0) {
+          // Dirty, with no instruction, just add this.
+          It->second = Entry.second;
+          ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
+        } else {
+          // Otherwise, dirty with a different instruction.
+          RemoveFromReverseMap(ReverseNonLocalPtrDeps, It->second.getInst(),
+                               CacheKey);
+          It->second = Entry.second;
+          ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
+        }
+        Cache = 0;
+        continue;
+      }
+
+      // FIXME: it is entirely possible that PHI translating will end up with
+      // the same value.  Consider PHI translating something like:
+      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
+      // to recurse here, pedantically speaking.
       
-      // Since we did phi translation, the "Cache" set won't contain all of the
-      // results for the query.  This is ok (we can still use it to accelerate
-      // specific block queries) but we can't do the fastpath "return all
-      // results from the set"  Clear out the indicator for this.
-      CacheInfo->first = BBSkipFirstBlockPair();
-      SkipFirstBlock = false;
-      continue;
+      // If we have a problem phi translating, fall through to the code below
+      // to handle the failure condition.
+      if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
+                                      Result, Visited))
+        goto PredTranslationFailure;
     }
     
-    // TODO: BITCAST, GEP.
+    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+    CacheInfo = &NonLocalPointerDeps[CacheKey];
+    Cache = &CacheInfo->second;
+    NumSortedEntries = Cache->size();
     
-    //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
-    //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
-    //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+    // Since we did phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set"  Clear out the indicator for this.
+    CacheInfo->first = BBSkipFirstBlockPair();
+    SkipFirstBlock = false;
+    continue;
+
   PredTranslationFailure:
     
     if (Cache == 0) {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index ea4af40..c6835ef 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3644,7 +3644,7 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
 /// the addressed element of the initializer or null if the index expression is
 /// invalid.
 static Constant *
-GetAddressedElementFromGlobal(LLVMContext &Context, GlobalVariable *GV,
+GetAddressedElementFromGlobal(GlobalVariable *GV,
                               const std::vector<ConstantInt*> &Indices) {
   Constant *Init = GV->getInitializer();
   for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
@@ -3732,7 +3732,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
     // Form the GEP offset.
     Indexes[VarIdxNum] = Val;
 
-    Constant *Result = GetAddressedElementFromGlobal(getContext(), GV, Indexes);
+    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
     if (Result == 0) break;  // Cannot compute!
 
     // Evaluate the condition for this iteration.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b0e6900..31d3ccc 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -325,7 +325,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       APInt Mask2(Mask.shl(ShiftAmt));
       ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
                         Depth+1);
-      assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
       // high bits known zero.
@@ -343,7 +343,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       APInt Mask2(Mask.shl(ShiftAmt));
       ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
                         Depth+1);
-      assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
         
@@ -380,7 +380,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
   // fall through
   case Instruction::Add: {
-    // If one of the operands has trailing zeros, than the bits that the
+    // If one of the operands has trailing zeros, then the bits that the
     // other operand has in those bit positions will be preserved in the
     // result. For an add, this works with either operand. For a subtract,
     // this only works if the known zeros are in the right operand.
@@ -436,7 +436,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
 
         KnownZero |= KnownZero2 & Mask;
 
-        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       }
     }
     break;
@@ -449,7 +449,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         KnownZero |= ~LowBits & Mask;
         ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
                           Depth+1);
-        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
         break;
       }
     }
@@ -833,14 +833,12 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
 
   switch (I->getOpcode()) {
   default: break;
-  case Instruction::SExt: {
+  case Instruction::SExt:
     if (!LookThroughSExt) return false;
     // otherwise fall through to ZExt
-  }
-  case Instruction::ZExt: {
+  case Instruction::ZExt:
     return ComputeMultiple(I->getOperand(0), Base, Multiple,
                            LookThroughSExt, Depth+1);
-  }
   case Instruction::Shl:
   case Instruction::Mul: {
     Value *Op0 = I->getOperand(0);
@@ -950,6 +948,195 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   return false;
 }
 
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers.  Return the scale and offset
+/// values as APInts and return V as a Value*.  The incoming Value is known to
+/// have IntegerType.  Note that this looks through extends, so the high bits
+/// may not be represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+                                  const TargetData *TD, unsigned Depth) {
+  assert(isa<IntegerType>(V->getType()) && "Not an integer value");
+
+  // Limit our recursion depth.
+  if (Depth == 6) {
+    Scale = 1;
+    Offset = 0;
+    return V;
+  }
+  
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+      switch (BOp->getOpcode()) {
+      default: break;
+      case Instruction::Or:
+        // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
+        // analyze it.
+        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD))
+          break;
+        // FALL THROUGH.
+      case Instruction::Add:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
+        Offset += RHSC->getValue();
+        return V;
+      case Instruction::Mul:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
+        Offset *= RHSC->getValue();
+        Scale *= RHSC->getValue();
+        return V;
+      case Instruction::Shl:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
+        Offset <<= RHSC->getValue().getLimitedValue();
+        Scale <<= RHSC->getValue().getLimitedValue();
+        return V;
+      }
+    }
+  }
+  
+  // Since clients don't care about the high bits of the value, just scales and
+  // offsets, we can look through extensions.
+  if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
+    Value *CastOp = cast<CastInst>(V)->getOperand(0);
+    unsigned OldWidth = Scale.getBitWidth();
+    unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+    Scale.trunc(SmallWidth);
+    Offset.trunc(SmallWidth);
+    Value *Result = GetLinearExpression(CastOp, Scale, Offset, TD, Depth+1);
+    Scale.zext(OldWidth);
+    Offset.zext(OldWidth);
+    return Result;
+  }
+  
+  Scale = 1;
+  Offset = 0;
+  return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When TargetData is around, this function is capable of analyzing everything
+/// that Value::getUnderlyingObject() can look through.  When not, it just looks
+/// through pointer casts.
+///
+const Value *llvm::DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+                 SmallVectorImpl<std::pair<const Value*, int64_t> > &VarIndices,
+                                          const TargetData *TD) {
+  // Limit recursion depth to limit compile time in crazy cases.
+  unsigned MaxLookup = 6;
+  
+  BaseOffs = 0;
+  do {
+    // See if this is a bitcast or GEP.
+    const Operator *Op = dyn_cast<Operator>(V);
+    if (Op == 0) {
+      // The only non-operator case we can handle are GlobalAliases.
+      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+        if (!GA->mayBeOverridden()) {
+          V = GA->getAliasee();
+          continue;
+        }
+      }
+      return V;
+    }
+    
+    if (Op->getOpcode() == Instruction::BitCast) {
+      V = Op->getOperand(0);
+      continue;
+    }
+    
+    const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+    if (GEPOp == 0)
+      return V;
+    
+    // Don't attempt to analyze GEPs over unsized objects.
+    if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+        ->getElementType()->isSized())
+      return V;
+    
+    // If we are lacking TargetData information, we can't compute the offets of
+    // elements computed by GEPs.  However, we can handle bitcast equivalent
+    // GEPs.
+    if (!TD) {
+      if (!GEPOp->hasAllZeroIndices())
+        return V;
+      V = GEPOp->getOperand(0);
+      continue;
+    }
+    
+    // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+    gep_type_iterator GTI = gep_type_begin(GEPOp);
+    for (User::const_op_iterator I = GEPOp->op_begin()+1,
+         E = GEPOp->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      // Compute the (potentially symbolic) offset in bytes for this index.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+        // For a struct, add the member offset.
+        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+        if (FieldNo == 0) continue;
+        
+        BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+        continue;
+      }
+      
+      // For an array/pointer, add the element offset, explicitly scaled.
+      if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+        if (CIdx->isZero()) continue;
+        BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+        continue;
+      }
+      
+      uint64_t Scale = TD->getTypeAllocSize(*GTI);
+      
+      // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+      unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+      APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+      Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD, 0);
+      
+      // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+      // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+      BaseOffs += IndexOffset.getZExtValue()*Scale;
+      Scale *= IndexScale.getZExtValue();
+      
+      
+      // If we already had an occurrance of this index variable, merge this
+      // scale into it.  For example, we want to handle:
+      //   A[x][x] -> x*16 + x*4 -> x*20
+      // This also ensures that 'x' only appears in the index list once.
+      for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+        if (VarIndices[i].first == Index) {
+          Scale += VarIndices[i].second;
+          VarIndices.erase(VarIndices.begin()+i);
+          break;
+        }
+      }
+      
+      // Make sure that we have a scale that makes sense for this target's
+      // pointer size.
+      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+        Scale <<= ShiftBits;
+        Scale >>= ShiftBits;
+      }
+      
+      if (Scale)
+        VarIndices.push_back(std::make_pair(Index, Scale));
+    }
+    
+    // Analyze the base pointer next.
+    V = GEPOp->getOperand(0);
+  } while (--MaxLookup);
+  
+  // If the chain of expressions is too deep, just return early.
+  return V;
+}
+
+
 // This is the recursive version of BuildSubAggregate. It takes a few different
 // arguments. Idxs is the index within the nested struct From that we are
 // looking at now (which is of type IndexedType). IdxSkip is the number of
@@ -959,7 +1146,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
 static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
                                 SmallVector<unsigned, 10> &Idxs,
                                 unsigned IdxSkip,
-                                LLVMContext &Context,
                                 Instruction *InsertBefore) {
   const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
   if (STy) {
@@ -971,7 +1157,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
       Idxs.push_back(i);
       Value *PrevTo = To;
       To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
-                             Context, InsertBefore);
+                             InsertBefore);
       Idxs.pop_back();
       if (!To) {
         // Couldn't find any inserted value for this index? Cleanup
@@ -994,7 +1180,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
   // we might be able to find the complete struct somewhere.
   
   // Find the value that is at that particular spot
-  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end(), Context);
+  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
 
   if (!V)
     return NULL;
@@ -1017,7 +1203,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
 //
 // All inserted insertvalue instructions are inserted before InsertBefore
 static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
-                                const unsigned *idx_end, LLVMContext &Context,
+                                const unsigned *idx_end,
                                 Instruction *InsertBefore) {
   assert(InsertBefore && "Must have someplace to insert!");
   const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
@@ -1027,8 +1213,7 @@ static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
   SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
   unsigned IdxSkip = Idxs.size();
 
-  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip,
-                           Context, InsertBefore);
+  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
 }
 
 /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
@@ -1038,8 +1223,7 @@ static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
 /// If InsertBefore is not null, this function will duplicate (modified)
 /// insertvalues when a part of a nested struct is extracted.
 Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
-                         const unsigned *idx_end, LLVMContext &Context,
-                         Instruction *InsertBefore) {
+                         const unsigned *idx_end, Instruction *InsertBefore) {
   // Nothing to index? Just return V then (this is useful at the end of our
   // recursion)
   if (idx_begin == idx_end)
@@ -1063,7 +1247,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
       // Recursively process this constant
       return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1,
-                               idx_end, Context, InsertBefore);
+                               idx_end, InsertBefore);
   } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
     // Loop the indices for the insertvalue instruction in parallel with the
     // requested indices
@@ -1082,8 +1266,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
           // %C = insertvalue {i32, i32 } %A, i32 11, 1
           // which allows the unused 0,0 element from the nested struct to be
           // removed.
-          return BuildSubAggregate(V, idx_begin, req_idx,
-                                   Context, InsertBefore);
+          return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
         else
           // We can't handle this without inserting insertvalues
           return 0;
@@ -1094,13 +1277,13 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
       // looking for, then.
       if (*req_idx != *i)
         return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
-                                 Context, InsertBefore);
+                                 InsertBefore);
     }
     // If we end up here, the indices of the insertvalue match with those
     // requested (though possibly only partially). Now we recursively look at
     // the inserted value, passing any remaining indices.
     return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
-                             Context, InsertBefore);
+                             InsertBefore);
   } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
     // If we're extracting a value from an aggregrate that was extracted from
     // something else, we can extract from that something else directly instead.
@@ -1124,7 +1307,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
            && "Number of indices added not correct?");
     
     return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
-                             Context, InsertBefore);
+                             InsertBefore);
   }
   // Otherwise, we don't know (such as, extracting from a function return value
   // or load instruction)
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 26b6a09..a92dbf8 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -2701,6 +2701,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Add all of the arguments we parsed to the function.
   Function::arg_iterator ArgIt = Fn->arg_begin();
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
+    // If we run out of arguments in the Function prototype, exit early.
+    // FIXME: REMOVE THIS IN LLVM 3.0, this is just for the mismatch case above.
+    if (ArgIt == Fn->arg_end()) break;
+    
     // If the argument has a name, insert it into the argument symbol table.
     if (ArgList[i].Name.empty()) continue;
 
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index c37c793..8e3f8e7 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -28,10 +28,15 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
 static cl::opt<int>
-AntiDepTrials("agg-antidep-trials",
-              cl::desc("Maximum number of anti-dependency breaking passes"),
-              cl::init(1), cl::Hidden);
+DebugDiv("agg-antidep-debugdiv",
+                      cl::desc("Debug control for aggressive anti-dep breaker"),
+                      cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+                      cl::desc("Debug control for aggressive anti-dep breaker"),
+                      cl::init(0), cl::Hidden);
 
 AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) :
   GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) {
@@ -108,7 +113,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
   MRI(MF.getRegInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
   AllocatableSet(TRI->getAllocatableSet(MF)),
-  State(NULL), SavedState(NULL) {
+  State(NULL) {
   /* Collect a bitset of all registers that are only broken if they
      are on the critical path. */
   for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
@@ -128,13 +133,6 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
 
 AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
   delete State;
-  delete SavedState;
-}
-
-unsigned AggressiveAntiDepBreaker::GetMaxTrials() {
-  if (AntiDepTrials <= 0)
-    return 1;
-  return AntiDepTrials;
 }
 
 void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
@@ -206,8 +204,6 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
 void AggressiveAntiDepBreaker::FinishBlock() {
   delete State;
   State = NULL;
-  delete SavedState;
-  SavedState = NULL;
 }
 
 void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -241,10 +237,6 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
     }
   }
   DEBUG(errs() << '\n');
-
-  // We're starting a new schedule region so forget any saved state.
-  delete SavedState;
-  SavedState = NULL;
 }
 
 bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
@@ -283,27 +275,20 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
   }
 }
 
-/// AntiDepEdges - Return in Edges the anti- and output-
-/// dependencies on Regs in SU that we want to consider for breaking.
-static void AntiDepEdges(SUnit *SU, 
-                         const AntiDepBreaker::AntiDepRegVector& Regs,
-                         std::vector<SDep*>& Edges) {
-  AntiDepBreaker::AntiDepRegSet RegSet;
-  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
-    RegSet.insert(Regs[i]);
-
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
+  SmallSet<unsigned, 4> RegSet;
   for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
     if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
       unsigned Reg = P->getReg();
-      if (RegSet.count(Reg) != 0) {
+      if (RegSet.count(Reg) == 0) {
         Edges.push_back(&*P);
-        RegSet.erase(Reg);
+        RegSet.insert(Reg);
       }
     }
   }
-
-  assert(RegSet.empty() && "Expected all antidep registers to be found");
 }
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
@@ -332,7 +317,8 @@ static SUnit *CriticalPathStep(SUnit *SU) {
 }
 
 void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
-                                             const char *tag) {
+                                             const char *tag, const char *header,
+                                             const char *footer) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
@@ -343,6 +329,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
     DefIndices[Reg] = ~0u;
     RegRefs.erase(Reg);
     State->LeaveGroup(Reg);
+    DEBUG(if (header != NULL) {
+        errs() << header << TRI->getName(Reg); header = NULL; });
     DEBUG(errs() << "->g" << State->GetGroup(Reg) << tag);
   }
   // Repeat for subregisters.
@@ -354,10 +342,14 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
       DefIndices[SubregReg] = ~0u;
       RegRefs.erase(SubregReg);
       State->LeaveGroup(SubregReg);
+      DEBUG(if (header != NULL) {
+          errs() << header << TRI->getName(Reg); header = NULL; });
       DEBUG(errs() << " " << TRI->getName(SubregReg) << "->g" <<
             State->GetGroup(SubregReg) << tag);
     }
   }
+
+  DEBUG(if ((header == NULL) && (footer != NULL)) errs() << footer);
 }
 
 void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count,
@@ -377,9 +369,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
     
-    DEBUG(errs() << "\tDead Def: " << TRI->getName(Reg));
-    HandleLastUse(Reg, Count + 1, "");
-    DEBUG(errs() << '\n');
+    HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
   }
 
   DEBUG(errs() << "\tDef Groups:");
@@ -427,15 +417,17 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    // Ignore passthru registers for liveness...
-    if (PassthruRegs.count(Reg) != 0) continue;
+    // Ignore KILLs and passthru registers for liveness...
+    if ((MI->getOpcode() == TargetInstrInfo::KILL) ||
+        (PassthruRegs.count(Reg) != 0))
+      continue;
 
-    // Update def for Reg and subregs.
+    // Update def for Reg and aliases.
     DefIndices[Reg] = Count;
-    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-         *Subreg; ++Subreg) {
-      unsigned SubregReg = *Subreg;
-      DefIndices[SubregReg] = Count;
+    for (const unsigned *Alias = TRI->getAliasSet(Reg);
+         *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      DefIndices[AliasReg] = Count;
     }
   }
 }
@@ -589,72 +581,108 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
       return false;
   }
 
-  // FIXME: for now just handle single register in group case...
-  if (Regs.size() > 1) {
-    DEBUG(errs() << "\tMultiple rename registers in group\n");
-    return false;
+#ifndef NDEBUG
+  // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+  if (DebugDiv > 0) {
+    static int renamecnt = 0;
+    if (renamecnt++ % DebugDiv != DebugMod)
+      return false;
+    
+    errs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+      " for debug ***\n";
   }
+#endif
 
   // Check each possible rename register for SuperReg in round-robin
   // order. If that register is available, and the corresponding
   // registers are available for the other group subregisters, then we
   // can use those registers to rename.
-  BitVector SuperBV = RenameRegisterMap[SuperReg];
   const TargetRegisterClass *SuperRC = 
     TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
   
   const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
   const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
   if (RB == RE) {
-    DEBUG(errs() << "\tEmpty Regclass!!\n");
+    DEBUG(errs() << "\tEmpty Super Regclass!!\n");
     return false;
   }
 
+  DEBUG(errs() << "\tFind Registers:");
+
   if (RenameOrder.count(SuperRC) == 0)
     RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
 
-  DEBUG(errs() << "\tFind Register:");
-
   const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
   const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
   TargetRegisterClass::iterator R = OrigR;
   do {
     if (R == RB) R = RE;
     --R;
-    const unsigned Reg = *R;
+    const unsigned NewSuperReg = *R;
     // Don't replace a register with itself.
-    if (Reg == SuperReg) continue;
-    
-    DEBUG(errs() << " " << TRI->getName(Reg));
+    if (NewSuperReg == SuperReg) continue;
     
-    // If Reg is dead and Reg's most recent def is not before
-    // SuperRegs's kill, it's safe to replace SuperReg with Reg. We
-    // must also check all subregisters of Reg.
-    if (State->IsLive(Reg) || (KillIndices[SuperReg] > DefIndices[Reg])) {
-      DEBUG(errs() << "(live)");
-      continue;
-    } else {
-      bool found = false;
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        unsigned SubregReg = *Subreg;
-        if (State->IsLive(SubregReg) || (KillIndices[SuperReg] > DefIndices[SubregReg])) {
-          DEBUG(errs() << "(subreg " << TRI->getName(SubregReg) << " live)");
-          found = true;
-          break;
+    DEBUG(errs() << " [" << TRI->getName(NewSuperReg) << ':');
+    RenameMap.clear();
+
+    // For each referenced group register (which must be a SuperReg or
+    // a subregister of SuperReg), find the corresponding subregister
+    // of NewSuperReg and make sure it is free to be renamed.
+    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+      unsigned Reg = Regs[i];
+      unsigned NewReg = 0;
+      if (Reg == SuperReg) {
+        NewReg = NewSuperReg;
+      } else {
+        unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+        if (NewSubRegIdx != 0)
+          NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+      }
+
+      DEBUG(errs() << " " << TRI->getName(NewReg));
+      
+      // Check if Reg can be renamed to NewReg.
+      BitVector BV = RenameRegisterMap[Reg];
+      if (!BV.test(NewReg)) {
+        DEBUG(errs() << "(no rename)");
+        goto next_super_reg;
+      }
+
+      // If NewReg is dead and NewReg's most recent def is not before
+      // Regs's kill, it's safe to replace Reg with NewReg. We
+      // must also check all aliases of NewReg, because we can't define a
+      // register when any sub or super is already live.
+      if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+        DEBUG(errs() << "(live)");
+        goto next_super_reg;
+      } else {
+        bool found = false;
+        for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+             *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) {
+            DEBUG(errs() << "(alias " << TRI->getName(AliasReg) << " live)");
+            found = true;
+            break;
+          }
         }
+        if (found)
+          goto next_super_reg;
       }
-      if (found)
-        continue;
+      
+      // Record that 'Reg' can be renamed to 'NewReg'.
+      RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
     }
     
-    if (Reg != 0) { 
-      DEBUG(errs() << '\n');
-      RenameOrder.erase(SuperRC);
-      RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
-      RenameMap.insert(std::pair<unsigned, unsigned>(SuperReg, Reg));
-      return true;
-    }
+    // If we fall-out here, then every register in the group can be
+    // renamed, as recorded in RenameMap.
+    RenameOrder.erase(SuperRC);
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+    DEBUG(errs() << "]\n");
+    return true;
+
+  next_super_reg:
+    DEBUG(errs() << ']');
   } while (R != EndR);
 
   DEBUG(errs() << '\n');
@@ -668,7 +696,6 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 ///
 unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
                               std::vector<SUnit>& SUnits,
-                              CandidateMap& Candidates,
                               MachineBasicBlock::iterator& Begin,
                               MachineBasicBlock::iterator& End,
                               unsigned InsertPosIndex) {
@@ -681,16 +708,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
   
-  // Manage saved state to enable multiple passes...
-  if (AntiDepTrials > 1) {
-    if (SavedState == NULL) {
-      SavedState = new AggressiveAntiDepState(*State);
-    } else {
-      delete State;
-      State = new AggressiveAntiDepState(*SavedState);
-    }
-  }
-  
   // For each regclass the next register to use for renaming.
   RenameOrderType RenameOrder;
 
@@ -719,21 +736,14 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     CriticalPathMI = CriticalPathSU->getInstr();
   }
 
-  // Even if there are no anti-dependencies we still need to go
-  // through the instructions to update Def, Kills, etc.
 #ifndef NDEBUG 
-  if (Candidates.empty()) {
-    DEBUG(errs() << "\n===== No anti-dependency candidates\n");
-  } else {
-    DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << 
-          " anti-dependencies\n");
-    DEBUG(errs() << "Available regs:");
-    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
-      if (!State->IsLive(Reg))
-        DEBUG(errs() << " " << TRI->getName(Reg));
-    }
-    DEBUG(errs() << '\n');
+  DEBUG(errs() << "\n===== Aggressive anti-dependency breaking\n");
+  DEBUG(errs() << "Available regs:");
+  for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+    if (!State->IsLive(Reg))
+      DEBUG(errs() << " " << TRI->getName(Reg));
   }
+  DEBUG(errs() << '\n');
 #endif
 
   // Attempt to break anti-dependence edges. Walk the instructions
@@ -754,14 +764,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     // Process the defs in MI...
     PrescanInstruction(MI, Count, PassthruRegs);
     
-    // The the dependence edges that represent anti- and output-
+    // The dependence edges that represent anti- and output-
     // dependencies that are candidates for breaking.
     std::vector<SDep*> Edges;
     SUnit *PathSU = MISUnitMap[MI];
-    AntiDepBreaker::CandidateMap::iterator 
-      citer = Candidates.find(PathSU);
-    if (citer != Candidates.end())
-      AntiDepEdges(PathSU, citer->second, Edges);
+    AntiDepEdges(PathSU, Edges);
 
     // If MI is not on the critical path, then we don't rename
     // registers in the CriticalPathSet.
@@ -817,12 +824,32 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
           // anti-dependency since those edges would prevent such
           // units from being scheduled past each other
           // regardless.
+          //
+          // Also, if there are dependencies on other SUnits with the
+          // same register as the anti-dependency, don't attempt to
+          // break it.
+          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+                 PE = PathSU->Preds.end(); P != PE; ++P) {
+            if (P->getSUnit() == NextSU ?
+                (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+                (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+              AntiDepReg = 0;
+              break;
+            }
+          }
           for (SUnit::pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
-            if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti)) {
+            if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+                (P->getKind() != SDep::Output)) {
               DEBUG(errs() << " (real dependency)\n");
               AntiDepReg = 0;
               break;
+            } else if ((P->getSUnit() != NextSU) && 
+                       (P->getKind() == SDep::Data) && 
+                       (P->getReg() == AntiDepReg)) {
+              DEBUG(errs() << " (other dependency)\n");
+              AntiDepReg = 0;
+              break;
             }
           }
           
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index e5c9a7b..8154d2d 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -27,12 +27,11 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include <map>
 
 namespace llvm {
   /// Class AggressiveAntiDepState 
-  /// Contains all the state necessary for anti-dep breaking. We place
-  /// into a separate class so be can conveniently save/restore it to
-  /// enable multi-pass anti-dep breaking.
+  /// Contains all the state necessary for anti-dep breaking.
   class AggressiveAntiDepState {
   public:
     /// RegisterReference - Information about a register reference
@@ -126,23 +125,11 @@ namespace llvm {
     /// registers.
     AggressiveAntiDepState *State;
 
-    /// SavedState - The state for the start of an anti-dep
-    /// region. Used to restore the state at the beginning of each
-    /// pass
-    AggressiveAntiDepState *SavedState;
-
   public:
     AggressiveAntiDepBreaker(MachineFunction& MFi, 
                              TargetSubtarget::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
     
-    /// GetMaxTrials - As anti-dependencies are broken, additional
-    /// dependencies may be exposed, so multiple passes are required.
-    unsigned GetMaxTrials();
-
-    /// NeedCandidates - Candidates required.
-    bool NeedCandidates() { return true; }
-
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
@@ -150,7 +137,6 @@ namespace llvm {
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   CandidateMap& Candidates,
                                    MachineBasicBlock::iterator& Begin,
                                    MachineBasicBlock::iterator& End,
                                    unsigned InsertPosIndex);
@@ -175,7 +161,9 @@ namespace llvm {
     /// return that register and all subregisters.
     void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
 
-    void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag);
+    void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+                       const char *header =NULL, const char *footer =NULL);
+
     void PrescanInstruction(MachineInstr *MI, unsigned Count,
                             std::set<unsigned>& PassthruRegs);
     void ScanInstruction(MachineInstr *MI, unsigned Count);
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index b614f68..3ee30c6 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -21,9 +21,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include <map>
+#include <vector>
 
 namespace llvm {
 
@@ -32,20 +30,8 @@ namespace llvm {
 /// anti-dependencies.
 class AntiDepBreaker {
 public:
-  typedef SmallSet<unsigned, 4> AntiDepRegSet;
-  typedef SmallVector<unsigned, 4> AntiDepRegVector;
-  typedef std::map<SUnit *, AntiDepRegVector> CandidateMap;
-
   virtual ~AntiDepBreaker();
 
-  /// GetMaxTrials - Return the maximum number of anti-dependence
-  /// breaking attempts that will be made for a block.
-  virtual unsigned GetMaxTrials() =0;
-
-  /// NeedCandidates - Return true if the schedule must provide
-  /// candidates with BreakAntiDependencies().
-  virtual bool NeedCandidates() =0;
-
   /// Start - Initialize anti-dep breaking for a new basic block.
   virtual void StartBlock(MachineBasicBlock *BB) =0;
 
@@ -54,7 +40,6 @@ public:
   /// the number of anti-dependencies broken.
   ///
   virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                CandidateMap& Candidates,
                                 MachineBasicBlock::iterator& Begin,
                                 MachineBasicBlock::iterator& End,
                                 unsigned InsertPosIndex) =0;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 08e0eae..993cdbf 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -728,7 +728,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
 /// EmitString - Emit a string with quotes and a null terminator.
 /// Special characters are emitted properly.
 /// \literal (Eg. '\t') \endliteral
-void AsmPrinter::EmitString(const std::string &String) const {
+void AsmPrinter::EmitString(const StringRef String) const {
   EmitString(String.data(), String.size());
 }
 
@@ -1630,12 +1630,14 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
-MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
-  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock());
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA,
+                                            const char *Suffix) const {
+  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock(), Suffix);
 }
 
 MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
-                                            const BasicBlock *BB) const {
+                                            const BasicBlock *BB,
+                                            const char *Suffix) const {
   assert(BB->hasName() &&
          "Address of anonymous basic block not supported yet!");
 
@@ -1647,7 +1649,8 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
   SmallString<60> Name;
   raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA"
     << FuncName.size() << '_' << FuncName << '_'
-    << Mang->makeNameProper(BB->getName());
+    << Mang->makeNameProper(BB->getName())
+    << Suffix;
 
   return OutContext.GetOrCreateSymbol(Name.str());
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index ecf0007..0e93b98 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -105,26 +105,14 @@ DIE::~DIE() {
     delete Children[i];
 }
 
-/// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
 ///
-void DIE::AddSiblingOffset() {
+void DIE::addSiblingOffset() {
   DIEInteger *DI = new DIEInteger(0);
   Values.insert(Values.begin(), DI);
   Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIE::Profile(FoldingSetNodeID &ID) {
-  Abbrev.Profile(ID);
-
-  for (unsigned i = 0, N = Children.size(); i < N; ++i)
-    ID.AddPointer(Children[i]);
-
-  for (unsigned j = 0, M = Values.size(); j < M; ++j)
-    ID.AddPointer(Values[j]);
-}
-
 #ifndef NDEBUG
 void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount += IncIndent;
@@ -231,16 +219,6 @@ unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
   return 0;
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEInteger::Profile(FoldingSetNodeID &ID, unsigned Int) {
-  ID.AddInteger(isInteger);
-  ID.AddInteger(Int);
-}
-void DIEInteger::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Integer);
-}
-
 #ifndef NDEBUG
 void DIEInteger::print(raw_ostream &O) {
   O << "Int: " << (int64_t)Integer
@@ -258,16 +236,6 @@ void DIEString::EmitValue(Dwarf *D, unsigned Form) const {
   D->getAsm()->EmitString(Str);
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEString::Profile(FoldingSetNodeID &ID, const std::string &Str) {
-  ID.AddInteger(isString);
-  ID.AddString(Str);
-}
-void DIEString::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Str);
-}
-
 #ifndef NDEBUG
 void DIEString::print(raw_ostream &O) {
   O << "Str: \"" << Str << "\"";
@@ -292,16 +260,6 @@ unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEDwarfLabel::Profile(FoldingSetNodeID &ID, const DWLabel &Label) {
-  ID.AddInteger(isLabel);
-  Label.Profile(ID);
-}
-void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Label);
-}
-
 #ifndef NDEBUG
 void DIEDwarfLabel::print(raw_ostream &O) {
   O << "Lbl: ";
@@ -327,16 +285,6 @@ unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEObjectLabel::Profile(FoldingSetNodeID &ID, const std::string &Label) {
-  ID.AddInteger(isAsIsLabel);
-  ID.AddString(Label);
-}
-void DIEObjectLabel::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Label.c_str());
-}
-
 #ifndef NDEBUG
 void DIEObjectLabel::print(raw_ostream &O) {
   O << "Obj: " << Label;
@@ -363,20 +311,6 @@ unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIESectionOffset::Profile(FoldingSetNodeID &ID, const DWLabel &Label,
-                               const DWLabel &Section) {
-  ID.AddInteger(isSectionOffset);
-  Label.Profile(ID);
-  Section.Profile(ID);
-  // IsEH and UseSet are specific to the Label/Section that we will emit the
-  // offset for; so Label/Section are enough for uniqueness.
-}
-void DIESectionOffset::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Label, Section);
-}
-
 #ifndef NDEBUG
 void DIESectionOffset::print(raw_ostream &O) {
   O << "Off: ";
@@ -405,18 +339,6 @@ unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEDelta::Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
-                       const DWLabel &LabelLo) {
-  ID.AddInteger(isDelta);
-  LabelHi.Profile(ID);
-  LabelLo.Profile(ID);
-}
-void DIEDelta::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, LabelHi, LabelLo);
-}
-
 #ifndef NDEBUG
 void DIEDelta::print(raw_ostream &O) {
   O << "Del: ";
@@ -436,21 +358,6 @@ void DIEEntry::EmitValue(Dwarf *D, unsigned Form) const {
   D->getAsm()->EmitInt32(Entry->getOffset());
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEEntry::Profile(FoldingSetNodeID &ID, DIE *Entry) {
-  ID.AddInteger(isEntry);
-  ID.AddPointer(Entry);
-}
-void DIEEntry::Profile(FoldingSetNodeID &ID) {
-  ID.AddInteger(isEntry);
-
-  if (Entry)
-    ID.AddPointer(Entry);
-  else
-    ID.AddPointer(this);
-}
-
 #ifndef NDEBUG
 void DIEEntry::print(raw_ostream &O) {
   O << format("Die: 0x%lx", (long)(intptr_t)Entry);
@@ -505,11 +412,6 @@ unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
   return 0;
 }
 
-void DIEBlock::Profile(FoldingSetNodeID &ID) {
-  ID.AddInteger(isBlock);
-  DIE::Profile(ID);
-}
-
 #ifndef NDEBUG
 void DIEBlock::print(raw_ostream &O) {
   O << "Blk: ";
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 3e50a15..dc6a70a 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -113,7 +113,7 @@ namespace llvm {
   class CompileUnit;
   class DIEValue;
 
-  class DIE : public FoldingSetNode {
+  class DIE {
   protected:
     /// Abbrev - Buffer for constructing abbreviation.
     ///
@@ -161,38 +161,28 @@ namespace llvm {
     void setSize(unsigned S) { Size = S; }
     void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; }
 
-    /// AddValue - Add a value and attributes to a DIE.
+    /// addValue - Add a value and attributes to a DIE.
     ///
-    void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+    void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
       Abbrev.AddAttribute(Attribute, Form);
       Values.push_back(Value);
     }
 
     /// SiblingOffset - Return the offset of the debug information entry's
     /// sibling.
-    unsigned SiblingOffset() const { return Offset + Size; }
+    unsigned getSiblingOffset() const { return Offset + Size; }
 
-    /// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+    /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
     ///
-    void AddSiblingOffset();
+    void addSiblingOffset();
 
-    /// AddChild - Add a child to the DIE.
+    /// addChild - Add a child to the DIE.
     ///
-    void AddChild(DIE *Child) {
+    void addChild(DIE *Child) {
       Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
       Children.push_back(Child);
     }
 
-    /// Detach - Detaches objects connected to it after copying.
-    ///
-    void Detach() {
-      Children.clear();
-    }
-
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    void Profile(FoldingSetNodeID &ID) ;
-
 #ifndef NDEBUG
     void print(raw_ostream &O, unsigned IncIndent = 0);
     void dump();
@@ -202,7 +192,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEValue - A debug information entry value.
   ///
-  class DIEValue : public FoldingSetNode {
+  class DIEValue {
   public:
     enum {
       isInteger,
@@ -233,10 +223,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    virtual void Profile(FoldingSetNodeID &ID) = 0;
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEValue *) { return true; }
 
@@ -277,10 +263,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, unsigned Int);
-    virtual void Profile(FoldingSetNodeID &ID);
 
     // Implement isa/cast/dyncast.
     static bool classof(const DIEInteger *) { return true; }
@@ -295,9 +277,9 @@ namespace llvm {
   /// DIEString - A string value DIE.
   ///
   class DIEString : public DIEValue {
-    const std::string Str;
+    const StringRef Str;
   public:
-    explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {}
+    explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
 
     /// EmitValue - Emit string value.
     ///
@@ -309,11 +291,6 @@ namespace llvm {
       return Str.size() + sizeof(char); // sizeof('\0');
     }
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const std::string &Str);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEString *) { return true; }
     static bool classof(const DIEValue *S) { return S->getType() == isString; }
@@ -339,11 +316,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const DWLabel &Label);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEDwarfLabel *)  { return true; }
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
@@ -370,11 +342,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const std::string &Label);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEObjectLabel *) { return true; }
     static bool classof(const DIEValue *L) {
@@ -408,12 +375,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const DWLabel &Label,
-                        const DWLabel &Section);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIESectionOffset *)  { return true; }
     static bool classof(const DIEValue *D) {
@@ -443,12 +404,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
-                        const DWLabel &LabelLo);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEDelta *)  { return true; }
     static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
@@ -480,11 +435,6 @@ namespace llvm {
       return sizeof(int32_t);
     }
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, DIE *Entry);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEEntry *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
@@ -525,10 +475,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEBlock *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c62c435..c2e1e05 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -39,9 +39,7 @@ static TimerGroup &getDwarfTimerGroup() {
 
 /// Configuration values for initial hash set sizes (log2).
 ///
-static const unsigned InitDiesSetSize          = 9; // log2(512)
 static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
-static const unsigned InitValuesSetSize        = 9; // log2(512)
 
 namespace llvm {
 
@@ -55,70 +53,89 @@ class CompileUnit {
 
   /// Die - Compile unit debug information entry.
   ///
-  DIE *Die;
+  DIE *CUDie;
+
+  /// IndexTyDie - An anonymous type for index type.
+  DIE *IndexTyDie;
 
   /// GVToDieMap - Tracks the mapping of unit level debug informaton
   /// variables to debug information entries.
   /// FIXME : Rename GVToDieMap -> NodeToDieMap
-  std::map<MDNode *, DIE *> GVToDieMap;
+  ValueMap<MDNode *, DIE *> GVToDieMap;
 
   /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
   /// descriptors to debug information entries using a DIEEntry proxy.
   /// FIXME : Rename
-  std::map<MDNode *, DIEEntry *> GVToDIEEntryMap;
+  ValueMap<MDNode *, DIEEntry *> GVToDIEEntryMap;
 
   /// Globals - A map of globally visible named entities for this unit.
   ///
   StringMap<DIE*> Globals;
 
-  /// DiesSet - Used to uniquely define dies within the compile unit.
+  /// GlobalTypes - A map of globally visible types for this unit.
   ///
-  FoldingSet<DIE> DiesSet;
+  StringMap<DIE*> GlobalTypes;
+
 public:
   CompileUnit(unsigned I, DIE *D)
-    : ID(I), Die(D), DiesSet(InitDiesSetSize) {}
-  ~CompileUnit() { delete Die; }
+    : ID(I), CUDie(D), IndexTyDie(0) {}
+  ~CompileUnit() { delete CUDie; delete IndexTyDie; }
 
   // Accessors.
-  unsigned getID() const { return ID; }
-  DIE* getDie() const { return Die; }
-  StringMap<DIE*> &getGlobals() { return Globals; }
+  unsigned getID()                  const { return ID; }
+  DIE* getCUDie()                   const { return CUDie; }
+  const StringMap<DIE*> &getGlobals()     const { return Globals; }
+  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
 
   /// hasContent - Return true if this compile unit has something to write out.
   ///
-  bool hasContent() const { return !Die->getChildren().empty(); }
+  bool hasContent() const { return !CUDie->getChildren().empty(); }
 
-  /// AddGlobal - Add a new global entity to the compile unit.
+  /// addGlobal - Add a new global entity to the compile unit.
   ///
-  void AddGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+  void addGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+
+  /// addGlobalType - Add a new global type to the compile unit.
+  ///
+  void addGlobalType(const std::string &Name, DIE *Die) { 
+    GlobalTypes[Name] = Die; 
+  }
 
-  /// getDieMapSlotFor - Returns the debug information entry map slot for the
+  /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
-  DIE *&getDieMapSlotFor(MDNode *N) { return GVToDieMap[N]; }
+  DIE *getDIE(MDNode *N) { return GVToDieMap.lookup(N); }
 
-  /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for
-  /// the specified debug variable.
-  DIEEntry *&getDIEEntrySlotFor(MDNode *N) {
-    return GVToDIEEntryMap[N];
+  /// insertDIE - Insert DIE into the map.
+  void insertDIE(MDNode *N, DIE *D) {
+    GVToDieMap.insert(std::make_pair(N, D));
   }
 
-  /// AddDie - Adds or interns the DIE to the compile unit.
+  /// getDIEEntry - Returns the debug information entry for the speciefied
+  /// debug variable.
+  DIEEntry *getDIEEntry(MDNode *N) { return GVToDIEEntryMap.lookup(N); }
+
+  /// insertDIEEntry - Insert debug information entry into the map.
+  void insertDIEEntry(MDNode *N, DIEEntry *E) {
+    GVToDIEEntryMap.insert(std::make_pair(N, E));
+  }
+
+  /// addDie - Adds or interns the DIE to the compile unit.
   ///
-  DIE *AddDie(DIE &Buffer) {
-    FoldingSetNodeID ID;
-    Buffer.Profile(ID);
-    void *Where;
-    DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where);
-
-    if (!Die) {
-      Die = new DIE(Buffer);
-      DiesSet.InsertNode(Die, Where);
-      this->Die->AddChild(Die);
-      Buffer.Detach();
-    }
+  void addDie(DIE *Buffer) {
+    this->CUDie->addChild(Buffer);
+  }
+
+  // getIndexTyDie - Get an anonymous type for index type.
+  DIE *getIndexTyDie() {
+    return IndexTyDie;
+  }
 
-    return Die;
+  // setIndexTyDie - Set D as anonymous type for index which can be reused
+  // later.
+  void setIndexTyDie(DIE *D) {
+    IndexTyDie = D;
   }
+
 };
 
 //===----------------------------------------------------------------------===//
@@ -147,7 +164,7 @@ public:
 ///
 class DbgScope {
   DbgScope *Parent;                   // Parent to this scope.
-  DIDescriptor Desc;                  // Debug info descriptor for scope. 
+  DIDescriptor Desc;                  // Debug info descriptor for scope.
   WeakVH InlinedAtLocation;           // Location at which scope is inlined.
   bool AbstractScope;                 // Abstract Scope
   unsigned StartLabelID;              // Label ID of the beginning of scope.
@@ -162,7 +179,7 @@ class DbgScope {
 public:
   DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
-      StartLabelID(0), EndLabelID(0), 
+      StartLabelID(0), EndLabelID(0),
       LastInsn(0), FirstInsn(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
@@ -170,7 +187,7 @@ public:
   DbgScope *getParent()          const { return Parent; }
   void setParent(DbgScope *P)          { Parent = P; }
   DIDescriptor getDesc()         const { return Desc; }
-  MDNode *getInlinedAt()         const { 
+  MDNode *getInlinedAt()         const {
     return dyn_cast_or_null<MDNode>(InlinedAtLocation);
   }
   MDNode *getScopeNode()         const { return Desc.getNode(); }
@@ -187,26 +204,26 @@ public:
   bool isAbstractScope() const { return AbstractScope; }
   const MachineInstr *getFirstInsn()      { return FirstInsn; }
 
-  /// AddScope - Add a scope to the scope.
+  /// addScope - Add a scope to the scope.
   ///
-  void AddScope(DbgScope *S) { Scopes.push_back(S); }
+  void addScope(DbgScope *S) { Scopes.push_back(S); }
 
-  /// AddVariable - Add a variable to the scope.
+  /// addVariable - Add a variable to the scope.
   ///
-  void AddVariable(DbgVariable *V) { Variables.push_back(V); }
+  void addVariable(DbgVariable *V) { Variables.push_back(V); }
 
-  void FixInstructionMarkers() {
+  void fixInstructionMarkers() {
     assert (getFirstInsn() && "First instruction is missing!");
     if (getLastInsn())
       return;
-    
+
     // If a scope does not have an instruction to mark an end then use
     // the end of last child scope.
     SmallVector<DbgScope *, 4> &Scopes = getScopes();
     assert (!Scopes.empty() && "Inner most scope does not have last insn!");
     DbgScope *L = Scopes.back();
     if (!L->getLastInsn())
-      L->FixInstructionMarkers();
+      L->fixInstructionMarkers();
     setLastInsn(L->getLastInsn());
   }
 
@@ -236,21 +253,6 @@ void DbgScope::dump() const {
 }
 #endif
 
-//===----------------------------------------------------------------------===//
-/// DbgConcreteScope - This class is used to track a scope that holds concrete
-/// instance information.
-///
-class DbgConcreteScope : public DbgScope {
-  CompileUnit *Unit;
-  DIE *Die;                           // Debug info for this concrete scope.
-public:
-  DbgConcreteScope(DIDescriptor D) : DbgScope(NULL, D) {}
-
-  // Accessors.
-  DIE *getDie() const { return Die; }
-  void setDie(DIE *D) { Die = D; }
-};
-
 DbgScope::~DbgScope() {
   for (unsigned i = 0, N = Scopes.size(); i < N; ++i)
     delete Scopes[i];
@@ -263,7 +265,7 @@ DbgScope::~DbgScope() {
 DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
   : Dwarf(OS, A, T, "dbg"), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
-    ValuesSet(InitValuesSetSize), Values(), StringPool(),
+    DIEValues(), StringPool(),
     SectionSourceLines(), didInitial(false), shouldEmit(false),
     CurrentFnDbgScope(0), DebugTimer(0) {
   if (TimePassesIsEnabled)
@@ -271,15 +273,15 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
                            getDwarfTimerGroup());
 }
 DwarfDebug::~DwarfDebug() {
-  for (unsigned j = 0, M = Values.size(); j < M; ++j)
-    delete Values[j];
+  for (unsigned j = 0, M = DIEValues.size(); j < M; ++j)
+    delete DIEValues[j];
 
   delete DebugTimer;
 }
 
-/// AssignAbbrevNumber - Define a unique number for the abbreviation.
+/// assignAbbrevNumber - Define a unique number for the abbreviation.
 ///
-void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) {
+void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   // Profile the node so that we can make it unique.
   FoldingSetNodeID ID;
   Abbrev.Profile(ID);
@@ -300,224 +302,120 @@ void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) {
   }
 }
 
-/// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
 /// information entry.
-DIEEntry *DwarfDebug::CreateDIEEntry(DIE *Entry) {
-  DIEEntry *Value;
-
-  if (Entry) {
-    FoldingSetNodeID ID;
-    DIEEntry::Profile(ID, Entry);
-    void *Where;
-    Value = static_cast<DIEEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where));
-
-    if (Value) return Value;
-
-    Value = new DIEEntry(Entry);
-    ValuesSet.InsertNode(Value, Where);
-  } else {
-    Value = new DIEEntry(Entry);
-  }
-
-  Values.push_back(Value);
+DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
+  DIEEntry *Value = new DIEEntry(Entry);
+  DIEValues.push_back(Value);
   return Value;
 }
 
-/// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
-///
-void DwarfDebug::SetDIEEntry(DIEEntry *Value, DIE *Entry) {
-  Value->setEntry(Entry);
-
-  // Add to values set if not already there.  If it is, we merely have a
-  // duplicate in the values list (no harm.)
-  ValuesSet.GetOrInsertNode(Value);
-}
-
-/// AddUInt - Add an unsigned integer attribute data and value.
+/// addUInt - Add an unsigned integer attribute data and value.
 ///
-void DwarfDebug::AddUInt(DIE *Die, unsigned Attribute,
+void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
                          unsigned Form, uint64_t Integer) {
   if (!Form) Form = DIEInteger::BestForm(false, Integer);
-
-  FoldingSetNodeID ID;
-  DIEInteger::Profile(ID, Integer);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEInteger(Integer);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEInteger(Integer);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddSInt - Add an signed integer attribute data and value.
+/// addSInt - Add an signed integer attribute data and value.
 ///
-void DwarfDebug::AddSInt(DIE *Die, unsigned Attribute,
+void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
                          unsigned Form, int64_t Integer) {
   if (!Form) Form = DIEInteger::BestForm(true, Integer);
-
-  FoldingSetNodeID ID;
-  DIEInteger::Profile(ID, (uint64_t)Integer);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEInteger(Integer);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEInteger(Integer);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddString - Add a string attribute data and value.
+/// addString - Add a string attribute data and value.
 ///
-void DwarfDebug::AddString(DIE *Die, unsigned Attribute, unsigned Form,
-                           const std::string &String) {
-  FoldingSetNodeID ID;
-  DIEString::Profile(ID, String);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEString(String);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
+                           const StringRef String) {
+  DIEValue *Value = new DIEString(String);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddLabel - Add a Dwarf label attribute data and value.
+/// addLabel - Add a Dwarf label attribute data and value.
 ///
-void DwarfDebug::AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
                           const DWLabel &Label) {
-  FoldingSetNodeID ID;
-  DIEDwarfLabel::Profile(ID, Label);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEDwarfLabel(Label);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEDwarfLabel(Label);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+/// addObjectLabel - Add an non-Dwarf label attribute data and value.
 ///
-void DwarfDebug::AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
                                 const std::string &Label) {
-  FoldingSetNodeID ID;
-  DIEObjectLabel::Profile(ID, Label);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEObjectLabel(Label);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEObjectLabel(Label);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddSectionOffset - Add a section offset label attribute data and value.
+/// addSectionOffset - Add a section offset label attribute data and value.
 ///
-void DwarfDebug::AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
                                   const DWLabel &Label, const DWLabel &Section,
                                   bool isEH, bool useSet) {
-  FoldingSetNodeID ID;
-  DIESectionOffset::Profile(ID, Label, Section);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIESectionOffset(Label, Section, isEH, useSet);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIESectionOffset(Label, Section, isEH, useSet);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddDelta - Add a label delta attribute data and value.
+/// addDelta - Add a label delta attribute data and value.
 ///
-void DwarfDebug::AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
                           const DWLabel &Hi, const DWLabel &Lo) {
-  FoldingSetNodeID ID;
-  DIEDelta::Profile(ID, Hi, Lo);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEDelta(Hi, Lo);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEDelta(Hi, Lo);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddBlock - Add block data.
+/// addBlock - Add block data.
 ///
-void DwarfDebug::AddBlock(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
                           DIEBlock *Block) {
   Block->ComputeSize(TD);
-  FoldingSetNodeID ID;
-  Block->Profile(ID);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = Block;
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  } else {
-    // Already exists, reuse the previous one.
-    delete Block;
-    Block = cast<DIEBlock>(Value);
-  }
-
-  Die->AddValue(Attribute, Block->BestForm(), Value);
+  DIEValues.push_back(Block);
+  Die->addValue(Attribute, Block->BestForm(), Block);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DIVariable *V) {
+void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
   // If there is no compile unit specified, don't add a line #.
   if (V->getCompileUnit().isNull())
     return;
 
   unsigned Line = V->getLineNumber();
-  unsigned FileID = FindCompileUnit(V->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(V->getCompileUnit()).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) {
+void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
   // If there is no compile unit specified, don't add a line #.
   if (G->getCompileUnit().isNull())
     return;
 
   unsigned Line = G->getLineNumber();
-  unsigned FileID = FindCompileUnit(G->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(G->getCompileUnit()).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) {
+void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
   // If there is no compile unit specified, don't add a line #.
   if (SP->getCompileUnit().isNull())
     return;
@@ -527,25 +425,25 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) {
 
 
   unsigned Line = SP->getLineNumber();
-  unsigned FileID = FindCompileUnit(SP->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(SP->getCompileUnit()).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
+void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
   // If there is no compile unit specified, don't add a line #.
   DICompileUnit CU = Ty->getCompileUnit();
   if (CU.isNull())
     return;
 
   unsigned Line = Ty->getLineNumber();
-  unsigned FileID = FindCompileUnit(CU).getID();
+  unsigned FileID = findCompileUnit(CU).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
 /* Byref variables, in Blocks, are declared by the programmer as
@@ -571,12 +469,12 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
    side, the Debug Information Entry for the variable VarName needs to
    have a DW_AT_location that tells the debugger how to unwind through
    the pointers and __Block_byref_x_VarName struct to find the actual
-   value of the variable.  The function AddBlockByrefType does this.  */
+   value of the variable.  The function addBlockByrefType does this.  */
 
 /// Find the type the programmer originally declared the variable to be
 /// and return that type.
 ///
-DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) {
+DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
 
   DIType subType = Ty;
   unsigned tag = Ty.getTag();
@@ -596,19 +494,19 @@ DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) {
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     DIDerivedType DT = DIDerivedType(Element.getNode());
-    if (strcmp(Name.c_str(), DT.getName()) == 0)
+    if (Name == DT.getName())
       return (DT.getTypeDerivedFrom());
   }
 
   return Ty;
 }
 
-/// AddComplexAddress - Start with the address based on the location provided,
+/// addComplexAddress - Start with the address based on the location provided,
 /// and generate the DWARF information necessary to find the actual variable
 /// given the extra address information encoded in the DIVariable, starting from
 /// the starting location.  Add the DWARF information to the die.
 ///
-void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
+void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
                                    unsigned Attribute,
                                    const MachineLocation &Location) {
   const DIVariable &VD = DV->getVariable();
@@ -621,36 +519,36 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
 
   if (Location.isReg()) {
     if (Reg < 32) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     } else {
       Reg = Reg - dwarf::DW_OP_reg0;
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
     if (Reg < 32)
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
     else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
 
-    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
   }
 
   for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) {
     uint64_t Element = VD.getAddrElement(i);
 
     if (Element == DIFactory::OpPlus) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
     } else if (Element == DIFactory::OpDeref) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     } else llvm_unreachable("unknown DIFactory Opcode");
   }
 
   // Now attach the location information to the DIE.
-  AddBlock(Die, Attribute, 0, Block);
+  addBlock(Die, Attribute, 0, Block);
 }
 
 /* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -662,7 +560,7 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
    However, as far as the original *programmer* is concerned, the variable
    should still have type 'SomeType', as originally declared.
 
-   The function GetBlockByrefType dives into the __Block_byref_x_VarName
+   The function getBlockByrefType dives into the __Block_byref_x_VarName
    struct to find the original type of the variable, which is then assigned to
    the variable's Debug Information Entry as its real type.  So far, so good.
    However now the debugger will expect the variable VarName to have the type
@@ -707,13 +605,13 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
 
    That is what this function does.  */
 
-/// AddBlockByrefAddress - Start with the address based on the location
+/// addBlockByrefAddress - Start with the address based on the location
 /// provided, and generate the DWARF information necessary to find the
 /// actual Block variable (navigating the Block struct) based on the
 /// starting location.  Add the DWARF information to the die.  For
 /// more information, read large comment just above here.
 ///
-void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
                                       unsigned Attribute,
                                       const MachineLocation &Location) {
   const DIVariable &VD = DV->getVariable();
@@ -722,7 +620,7 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
   unsigned Tag = Ty.getTag();
   bool isPointer = false;
 
-  const char *varName = VD.getName();
+  StringRef varName = VD.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
     DIDerivedType DTy = DIDerivedType(Ty.getNode());
@@ -742,10 +640,10 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
   for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Fields.getElement(i);
     DIDerivedType DT = DIDerivedType(Element.getNode());
-    const char *fieldName = DT.getName();
-    if (strcmp(fieldName, "__forwarding") == 0)
+    StringRef fieldName = DT.getName();
+    if (fieldName == "__forwarding")
       forwardingField = Element;
-    else if (strcmp(fieldName, varName) == 0)
+    else if (fieldName == varName)
       varField = Element;
   }
 
@@ -766,148 +664,144 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
 
   if (Location.isReg()) {
     if (Reg < 32)
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     else {
       Reg = Reg - dwarf::DW_OP_reg0;
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
     if (Reg < 32)
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
     else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
 
-    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
   }
 
   // If we started with a pointer to the __Block_byref... struct, then
   // the first thing we need to do is dereference the pointer (DW_OP_deref).
   if (isPointer)
-    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
 
   // Next add the offset for the '__forwarding' field:
   // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
   // adding the offset if it's 0.
   if (forwardingFieldOffset > 0) {
-    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    AddUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
   }
 
   // Now dereference the __forwarding field to get to the real __Block_byref
   // struct:  DW_OP_deref.
-  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
 
   // Now that we've got the real __Block_byref... struct, add the offset
   // for the variable's field to get to the location of the actual variable:
   // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
   if (varFieldOffset > 0) {
-    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    AddUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
   }
 
   // Now attach the location information to the DIE.
-  AddBlock(Die, Attribute, 0, Block);
+  addBlock(Die, Attribute, 0, Block);
 }
 
-/// AddAddress - Add an address attribute to a die based on the location
+/// addAddress - Add an address attribute to a die based on the location
 /// provided.
-void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute,
+void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
                             const MachineLocation &Location) {
   unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
   DIEBlock *Block = new DIEBlock();
 
   if (Location.isReg()) {
     if (Reg < 32) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     } else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
     if (Reg < 32) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
     } else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
 
-    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
   }
 
-  AddBlock(Die, Attribute, 0, Block);
+  addBlock(Die, Attribute, 0, Block);
 }
 
-/// AddType - Add a new type attribute to the specified entity.
-void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
+/// addType - Add a new type attribute to the specified entity.
+void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
   if (Ty.isNull())
     return;
 
   // Check for pre-existence.
-  DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getNode());
+  DIEEntry *Entry = DW_Unit->getDIEEntry(Ty.getNode());
 
   // If it exists then use the existing value.
-  if (Slot) {
-    Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+  if (Entry) {
+    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
     return;
   }
 
   // Set up proxy.
-  Slot = CreateDIEEntry();
+  Entry = createDIEEntry();
+  DW_Unit->insertDIEEntry(Ty.getNode(), Entry);
 
   // Construct type.
-  DIE Buffer(dwarf::DW_TAG_base_type);
+  DIE *Buffer = new DIE(dwarf::DW_TAG_base_type);
   if (Ty.isBasicType())
-    ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getNode()));
+    constructTypeDIE(DW_Unit, *Buffer, DIBasicType(Ty.getNode()));
   else if (Ty.isCompositeType())
-    ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getNode()));
+    constructTypeDIE(DW_Unit, *Buffer, DICompositeType(Ty.getNode()));
   else {
     assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getNode()));
+    constructTypeDIE(DW_Unit, *Buffer, DIDerivedType(Ty.getNode()));
   }
 
   // Add debug information entry to entity and appropriate context.
   DIE *Die = NULL;
   DIDescriptor Context = Ty.getContext();
   if (!Context.isNull())
-    Die = DW_Unit->getDieMapSlotFor(Context.getNode());
+    Die = DW_Unit->getDIE(Context.getNode());
 
-  if (Die) {
-    DIE *Child = new DIE(Buffer);
-    Die->AddChild(Child);
-    Buffer.Detach();
-    SetDIEEntry(Slot, Child);
-  } else {
-    Die = DW_Unit->AddDie(Buffer);
-    SetDIEEntry(Slot, Die);
-  }
-
-  Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+  if (Die)
+    Die->addChild(Buffer);
+  else
+    DW_Unit->addDie(Buffer);
+  Entry->setEntry(Buffer);
+  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
 }
 
-/// ConstructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIBasicType BTy) {
   // Get core information.
-  const char *Name = BTy.getName();
+  StringRef Name = BTy.getName();
   Buffer.setTag(dwarf::DW_TAG_base_type);
-  AddUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
+  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
           BTy.getEncoding());
 
   // Add name if not anonymous or intermediate type.
-  if (Name)
-    AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   uint64_t Size = BTy.getSizeInBits() >> 3;
-  AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 }
 
-/// ConstructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIDerivedType DTy) {
   // Get core information.
-  const char *Name = DTy.getName();
+  StringRef Name = DTy.getName();
   uint64_t Size = DTy.getSizeInBits() >> 3;
   unsigned Tag = DTy.getTag();
 
@@ -918,26 +812,26 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 
   // Map to main type, void will not have a type.
   DIType FromTy = DTy.getTypeDerivedFrom();
-  AddType(DW_Unit, &Buffer, FromTy);
+  addType(DW_Unit, &Buffer, FromTy);
 
   // Add name if not anonymous or intermediate type.
-  if (Name && Tag != dwarf::DW_TAG_pointer_type)
-    AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add size if non-zero (derived types might be zero-sized.)
   if (Size)
-    AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 
   // Add source line info if available and TyDesc is not a forward declaration.
   if (!DTy.isForwardDecl())
-    AddSourceLine(&Buffer, &DTy);
+    addSourceLine(&Buffer, &DTy);
 }
 
-/// ConstructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DICompositeType CTy) {
   // Get core information.
-  const char *Name = CTy.getName();
+  StringRef Name = CTy.getName();
 
   uint64_t Size = CTy.getSizeInBits() >> 3;
   unsigned Tag = CTy.getTag();
@@ -946,7 +840,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   switch (Tag) {
   case dwarf::DW_TAG_vector_type:
   case dwarf::DW_TAG_array_type:
-    ConstructArrayTypeDIE(DW_Unit, Buffer, &CTy);
+    constructArrayTypeDIE(DW_Unit, Buffer, &CTy);
     break;
   case dwarf::DW_TAG_enumeration_type: {
     DIArray Elements = CTy.getTypeArray();
@@ -956,8 +850,8 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
       DIE *ElemDie = NULL;
       DIEnumerator Enum(Elements.getElement(i).getNode());
       if (!Enum.isNull()) {
-        ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
-        Buffer.AddChild(ElemDie);
+        ElemDie = constructEnumTypeDIE(DW_Unit, &Enum);
+        Buffer.addChild(ElemDie);
       }
     }
   }
@@ -966,17 +860,17 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add return type.
     DIArray Elements = CTy.getTypeArray();
     DIDescriptor RTy = Elements.getElement(0);
-    AddType(DW_Unit, &Buffer, DIType(RTy.getNode()));
+    addType(DW_Unit, &Buffer, DIType(RTy.getNode()));
 
     // Add prototype flag.
-    AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+    addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
     // Add arguments.
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      AddType(DW_Unit, Arg, DIType(Ty.getNode()));
-      Buffer.AddChild(Arg);
+      addType(DW_Unit, Arg, DIType(Ty.getNode()));
+      Buffer.addChild(Arg);
     }
   }
     break;
@@ -997,20 +891,20 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
         continue;
       DIE *ElemDie = NULL;
       if (Element.getTag() == dwarf::DW_TAG_subprogram)
-        ElemDie = CreateSubprogramDIE(DW_Unit,
+        ElemDie = createSubprogramDIE(DW_Unit,
                                       DISubprogram(Element.getNode()));
       else
-        ElemDie = CreateMemberDIE(DW_Unit,
+        ElemDie = createMemberDIE(DW_Unit,
                                   DIDerivedType(Element.getNode()));
-      Buffer.AddChild(ElemDie);
+      Buffer.addChild(ElemDie);
     }
 
     if (CTy.isAppleBlockExtension())
-      AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
 
     unsigned RLang = CTy.getRunTimeLang();
     if (RLang)
-      AddUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
               dwarf::DW_FORM_data1, RLang);
     break;
   }
@@ -1019,136 +913,143 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   }
 
   // Add name if not anonymous or intermediate type.
-  if (Name)
-    AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   if (Tag == dwarf::DW_TAG_enumeration_type ||
       Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
     // Add size if non-zero (derived types might be zero-sized.)
     if (Size)
-      AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
     else {
       // Add zero size if it is not a forward declaration.
       if (CTy.isForwardDecl())
-        AddUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
       else
-        AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
     }
 
     // Add source line info if available.
     if (!CTy.isForwardDecl())
-      AddSourceLine(&Buffer, &CTy);
+      addSourceLine(&Buffer, &CTy);
   }
 }
 
-/// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
-void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   int64_t L = SR.getLo();
   int64_t H = SR.getHi();
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
 
-  AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
   if (L)
-    AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
   if (H)
-    AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+    addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
 
-  Buffer.AddChild(DW_Subrange);
+  Buffer.addChild(DW_Subrange);
 }
 
-/// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                        DICompositeType *CTy) {
   Buffer.setTag(dwarf::DW_TAG_array_type);
   if (CTy->getTag() == dwarf::DW_TAG_vector_type)
-    AddUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
 
   // Emit derived type.
-  AddType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
+  addType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
   DIArray Elements = CTy->getTypeArray();
 
-  // Construct an anonymous type for index type.
-  DIE IdxBuffer(dwarf::DW_TAG_base_type);
-  AddUInt(&IdxBuffer, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
-  AddUInt(&IdxBuffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-          dwarf::DW_ATE_signed);
-  DIE *IndexTy = DW_Unit->AddDie(IdxBuffer);
+  // Get an anonymous type for index type.
+  DIE *IdxTy = DW_Unit->getIndexTyDie();
+  if (!IdxTy) {
+    // Construct an anonymous type for index type.
+    IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+            dwarf::DW_ATE_signed);
+    DW_Unit->addDie(IdxTy);
+    DW_Unit->setIndexTyDie(IdxTy);
+  }
 
   // Add subranges to array type.
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      ConstructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IndexTy);
+      constructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IdxTy);
   }
 }
 
-/// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
-  const char *Name = ETy->getName();
-  AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  StringRef Name = ETy->getName();
+  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   int64_t Value = ETy->getEnumValue();
-  AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
   return Enumerator;
 }
 
-/// CreateGlobalVariableDIE - Create new DIE using GV.
-DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+/// createGlobalVariableDIE - Create new DIE using GV.
+DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
                                          const DIGlobalVariable &GV) {
-  // If the global variable was optmized out then no need to create debug info entry.
+  // If the global variable was optmized out then no need to create debug info
+  // entry.
   if (!GV.getGlobal()) return NULL;
-  if (!GV.getDisplayName()) return NULL;
+  if (GV.getDisplayName().empty()) return NULL;
 
   DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
-  AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
+  addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
             GV.getDisplayName());
 
-  const char *LinkageName = GV.getLinkageName();
-  if (LinkageName) {
+  StringRef LinkageName = GV.getLinkageName();
+  if (!LinkageName.empty()) {
     // Skip special LLVM prefix that is used to inform the asm printer to not
     // emit usual symbol prefix before the symbol name. This happens for
     // Objective-C symbol names and symbol whose name is replaced using GCC's
     // __asm__ attribute.
     if (LinkageName[0] == 1)
-      LinkageName = &LinkageName[1];
-    AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+      LinkageName = LinkageName.substr(1);
+    addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
   }
-  AddType(DW_Unit, GVDie, GV.getType());
+  addType(DW_Unit, GVDie, GV.getType());
   if (!GV.isLocalToUnit())
-    AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-  AddSourceLine(GVDie, &GV);
+    addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+  addSourceLine(GVDie, &GV);
 
   // Add address.
   DIEBlock *Block = new DIEBlock();
-  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-  AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+  addObjectLabel(Block, 0, dwarf::DW_FORM_udata,
                  Asm->Mang->getMangledName(GV.getGlobal()));
-  AddBlock(GVDie, dwarf::DW_AT_location, 0, Block);
+  addBlock(GVDie, dwarf::DW_AT_location, 0, Block);
 
   return GVDie;
 }
 
-/// CreateMemberDIE - Create new member DIE.
-DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
+/// createMemberDIE - Create new member DIE.
+DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
   DIE *MemberDie = new DIE(DT.getTag());
-  if (const char *Name = DT.getName())
-    AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
+  StringRef Name = DT.getName();
+  if (!Name.empty())
+    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  
+  addType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
 
-  AddSourceLine(MemberDie, &DT);
+  addSourceLine(MemberDie, &DT);
 
   DIEBlock *MemLocationDie = new DIEBlock();
-  AddUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
 
   uint64_t Size = DT.getSizeInBits();
   uint64_t FieldSize = DT.getOriginalTypeSize();
 
   if (Size != FieldSize) {
     // Handle bitfield.
-    AddUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
-    AddUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
 
     uint64_t Offset = DT.getOffsetInBits();
     uint64_t FieldOffset = Offset;
@@ -1159,49 +1060,48 @@ DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
 
     // Maybe we need to work from the other end.
     if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
-    AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
 
     // Here WD_AT_data_member_location points to the anonymous
     // field that includes this bit field.
-    AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
 
   } else
     // This is not a bitfield.
-    AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
 
-  AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+  addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
 
   if (DT.isProtected())
-    AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
             dwarf::DW_ACCESS_protected);
   else if (DT.isPrivate())
-    AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
             dwarf::DW_ACCESS_private);
 
   return MemberDie;
 }
 
-/// CreateSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
+/// createSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
                                      const DISubprogram &SP,
                                      bool IsConstructor,
                                      bool IsInlined) {
   DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
+  addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
 
-  const char * Name = SP.getName();
-  AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  const char *LinkageName = SP.getLinkageName();
-  if (LinkageName) {
-    // Skip special LLVM prefix that is used to inform the asm printer to not emit
-    // usual symbol prefix before the symbol name. This happens for Objective-C
-    // symbol names and symbol whose name is replaced using GCC's __asm__ attribute.
+  StringRef LinkageName = SP.getLinkageName();
+  if (!LinkageName.empty()) {
+    // Skip special LLVM prefix that is used to inform the asm printer to not
+    // emit usual symbol prefix before the symbol name. This happens for
+    // Objective-C symbol names and symbol whose name is replaced using GCC's
+    // __asm__ attribute.
     if (LinkageName[0] == 1)
-      LinkageName = &LinkageName[1];
-    AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+      LinkageName = LinkageName.substr(1);
+    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
   }
-  AddSourceLine(SPDie, &SP);
+  addSourceLine(SPDie, &SP);
 
   DICompositeType SPTy = SP.getType();
   DIArray Args = SPTy.getTypeArray();
@@ -1210,53 +1110,52 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
   unsigned Lang = SP.getCompileUnit().getLanguage();
   if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
       Lang == dwarf::DW_LANG_ObjC)
-    AddUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+    addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
   unsigned SPTag = SPTy.getTag();
   if (!IsConstructor) {
     if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
-      AddType(DW_Unit, SPDie, SPTy);
+      addType(DW_Unit, SPDie, SPTy);
     else
-      AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
+      addType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
   }
 
   if (!SP.isDefinition()) {
-    AddUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled through RecordVariable.
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        AddType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
-        AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
-        SPDie->AddChild(Arg);
+        addType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
+        addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+        SPDie->addChild(Arg);
       }
   }
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
-  DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode());
-  Slot = SPDie;
+  DW_Unit->insertDIE(SP.getNode(), SPDie);
   return SPDie;
 }
 
-/// FindCompileUnit - Get the compile unit for the given descriptor.
+/// findCompileUnit - Get the compile unit for the given descriptor.
 ///
-CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const {
+CompileUnit &DwarfDebug::findCompileUnit(DICompileUnit Unit) const {
   DenseMap<Value *, CompileUnit *>::const_iterator I =
     CompileUnitMap.find(Unit.getNode());
   assert(I != CompileUnitMap.end() && "Missing compile unit.");
   return *I->second;
 }
 
-/// CreateDbgScopeVariable - Create a new scope variable.
+/// createDbgScopeVariable - Create a new scope variable.
 ///
-DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
+DIE *DwarfDebug::createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
   // Get the descriptor.
   const DIVariable &VD = DV->getVariable();
-  const char *Name = VD.getName();
-  if (!Name)
+  StringRef Name = VD.getName();
+  if (Name.empty())
     return NULL;
 
   // Translate tag to proper Dwarf tag.  The result variable is dropped for
@@ -1276,33 +1175,34 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
-  AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add source line info if available.
-  AddSourceLine(VariableDie, &VD);
+  addSourceLine(VariableDie, &VD);
 
   // Add variable type.
-  // FIXME: isBlockByrefVariable should be reformulated in terms of complex 
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
   if (VD.isBlockByrefVariable())
-    AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+    addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
   else
-    AddType(Unit, VariableDie, VD.getType());
+    addType(Unit, VariableDie, VD.getType());
 
   // Add variable address.
   // Variables for abstract instances of inlined functions don't get a
   // location.
   MachineLocation Location;
-  Location.set(RI->getFrameRegister(*MF),
-               RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-  
-  
+  unsigned FrameReg;
+  int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
+  Location.set(FrameReg, Offset);
+
+
   if (VD.hasComplexAddress())
-    AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
   else if (VD.isBlockByrefVariable())
-    AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
   else
-    AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+    addAddress(VariableDie, dwarf::DW_AT_location, Location);
 
   return VariableDie;
 }
@@ -1329,17 +1229,17 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
   DbgScope *Parent = NULL;
   if (GetConcreteScope) {
     DILocation IL(InlinedAt);
-    Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, 
+    Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
                          IL.getOrigLocation().getNode());
     assert (Parent && "Unable to find Parent scope!");
     NScope->setParent(Parent);
-    Parent->AddScope(NScope);
+    Parent->addScope(NScope);
   } else if (DIDescriptor(N).isLexicalBlock()) {
     DILexicalBlock DB(N);
     if (!DB.getContext().isNull()) {
       Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
       NScope->setParent(Parent);
-      Parent->AddScope(NScope);
+      Parent->addScope(NScope);
     }
   }
 
@@ -1365,7 +1265,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   DbgScope *AScope = AbstractScopes.lookup(N);
   if (AScope)
     return AScope;
-    
+
   DbgScope *Parent = NULL;
 
   DIDescriptor Scope(N);
@@ -1379,7 +1279,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
 
   if (Parent)
-    Parent->AddScope(AScope);
+    Parent->addScope(AScope);
   AScope->setAbstractScope();
   AbstractScopes[N] = AScope;
   if (DIDescriptor(N).isSubprogram())
@@ -1387,54 +1287,43 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   return AScope;
 }
 
-static DISubprogram getDISubprogram(MDNode *N) {
-
-  DIDescriptor D(N);
-  if (D.isNull())
-    return DISubprogram();
-
-  if (D.isCompileUnit()) 
-    return DISubprogram();
-
-  if (D.isSubprogram())
-    return DISubprogram(N);
-
-  if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(N).getContext().getNode());
-
-  llvm_unreachable("Unexpected Descriptor!");
-}
-
-DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) {
+/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+/// If there are global variables in this scope then create and insert
+/// DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
 
- DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode);
+ DIE *SPDie = ModuleCU->getDIE(SPNode);
  assert (SPDie && "Unable to find subprogram DIE!");
- AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
           DWLabel("func_begin", SubprogramCount));
- AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
           DWLabel("func_end", SubprogramCount));
  MachineLocation Location(RI->getFrameRegister(*MF));
- AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
- 
+ addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
  if (!DISubprogram(SPNode).isLocalToUnit())
-   AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+   addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
 
  // If there are global variables at this scope then add their dies.
- for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), 
+ for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
         SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
    MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
    if (!N) continue;
    DIGlobalVariable GV(N);
    if (GV.getContext().getNode() == SPNode) {
-     DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
+     DIE *ScopedGVDie = createGlobalVariableDIE(ModuleCU, GV);
      if (ScopedGVDie)
-       SPDie->AddChild(ScopedGVDie);
+       SPDie->addChild(ScopedGVDie);
    }
  }
+ 
  return SPDie;
 }
 
-DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
+/// constructLexicalScope - Construct new DW_TAG_lexical_block
+/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
   unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
   unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
 
@@ -1446,13 +1335,13 @@ DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
-  AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           StartID ? 
-             DWLabel("label", StartID) 
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+           StartID ?
+             DWLabel("label", StartID)
            : DWLabel("func_begin", SubprogramCount));
-  AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           EndID ? 
-             DWLabel("label", EndID) 
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+           EndID ?
+             DWLabel("label", EndID)
            : DWLabel("func_end", SubprogramCount));
 
 
@@ -1460,7 +1349,10 @@ DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
   return ScopeDIE;
 }
 
-DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
+/// constructInlinedScopeDIE - This scope represents inlined body of
+/// a function. Construct DIE to represent this concrete inlined copy
+/// of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
   unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
   assert (StartID && "Invalid starting label for an inlined scope!");
@@ -1475,14 +1367,14 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
 
   DISubprogram InlinedSP = getDISubprogram(DS.getNode());
-  DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+  DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode());
   assert (OriginDIE && "Unable to find Origin DIE!");
-  AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+  addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
               dwarf::DW_FORM_ref4, OriginDIE);
 
-  AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
            DWLabel("label", StartID));
-  AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
            DWLabel("label", EndID));
 
   InlinedSubprogramDIEs.insert(OriginDIE);
@@ -1492,7 +1384,8 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
     I = InlineInfo.find(InlinedSP.getNode());
 
   if (I == InlineInfo.end()) {
-    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE));
+    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID,
+                                                             ScopeDIE));
     InlinedSPNodes.push_back(InlinedSP.getNode());
   } else
     I->second.push_back(std::make_pair(StartID, ScopeDIE));
@@ -1500,18 +1393,20 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
   StringPool.insert(InlinedSP.getName());
   StringPool.insert(InlinedSP.getLinkageName());
   DILocation DL(Scope->getInlinedAt());
-  AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
-  AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+  addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+  addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
   return ScopeDIE;
 }
 
-DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV, 
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV,
                                       DbgScope *Scope, CompileUnit *Unit) {
   // Get the descriptor.
   const DIVariable &VD = DV->getVariable();
-  const char *Name = VD.getName();
-  if (!Name)
+  StringRef Name = VD.getName();
+  if (Name.empty())
     return NULL;
 
   // Translate tag to proper Dwarf tag.  The result variable is dropped for
@@ -1536,50 +1431,74 @@ DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV,
   DIE *AbsDIE = NULL;
   if (DbgVariable *AV = DV->getAbstractVariable())
     AbsDIE = AV->getDIE();
-  
+
   if (AbsDIE) {
     DIScope DS(Scope->getScopeNode());
     DISubprogram InlinedSP = getDISubprogram(DS.getNode());
-    DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+    DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode());
     (void) OriginSPDIE;
     assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
     DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
     assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
-    AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+    addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
                 dwarf::DW_FORM_ref4, AbsDIE);
   }
   else {
-    AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-    AddSourceLine(VariableDie, &VD);
+    addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addSourceLine(VariableDie, &VD);
 
     // Add variable type.
-    // FIXME: isBlockByrefVariable should be reformulated in terms of complex 
+    // FIXME: isBlockByrefVariable should be reformulated in terms of complex
     // addresses instead.
     if (VD.isBlockByrefVariable())
-      AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+      addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
     else
-      AddType(Unit, VariableDie, VD.getType());
+      addType(Unit, VariableDie, VD.getType());
   }
 
   // Add variable address.
   if (!Scope->isAbstractScope()) {
     MachineLocation Location;
-    Location.set(RI->getFrameRegister(*MF),
-                 RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-    
-    
+    unsigned FrameReg;
+    int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
+    Location.set(FrameReg, Offset);
+
     if (VD.hasComplexAddress())
-      AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
     else if (VD.isBlockByrefVariable())
-      AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
     else
-      AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+      addAddress(VariableDie, dwarf::DW_AT_location, Location);
   }
   DV->setDIE(VariableDie);
   return VariableDie;
 
 }
-DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) {
+
+void DwarfDebug::addPubTypes(DISubprogram SP) {
+  DICompositeType SPTy = SP.getType();
+  unsigned SPTag = SPTy.getTag();
+  if (SPTag != dwarf::DW_TAG_subroutine_type) 
+    return;
+
+  DIArray Args = SPTy.getTypeArray();
+  if (Args.isNull()) 
+    return;
+
+  for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+    DIType ATy(Args.getElement(i).getNode());
+    if (ATy.isNull())
+      continue;
+    DICompositeType CATy = getDICompositeType(ATy);
+    if (!CATy.isNull() && !CATy.getName().empty()) {
+      if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
+        ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
+    }
+  }
+}
+
+/// constructScopeDIE - Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
  if (!Scope)
   return NULL;
  DIScope DS(Scope->getScopeNode());
@@ -1588,43 +1507,46 @@ DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) {
 
  DIE *ScopeDIE = NULL;
  if (Scope->getInlinedAt())
-   ScopeDIE = ConstructInlinedScopeDIE(Scope);
+   ScopeDIE = constructInlinedScopeDIE(Scope);
  else if (DS.isSubprogram()) {
    if (Scope->isAbstractScope())
-     ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode());
+     ScopeDIE = ModuleCU->getDIE(DS.getNode());
    else
-     ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode());
+     ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
  }
  else {
-   ScopeDIE = ConstructLexicalScopeDIE(Scope);
+   ScopeDIE = constructLexicalScopeDIE(Scope);
    if (!ScopeDIE) return NULL;
  }
 
   // Add variables to scope.
   SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
   for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
-    DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU);
-    if (VariableDIE) 
-      ScopeDIE->AddChild(VariableDIE);
+    DIE *VariableDIE = constructVariableDIE(Variables[i], Scope, ModuleCU);
+    if (VariableDIE)
+      ScopeDIE->addChild(VariableDIE);
   }
 
   // Add nested scopes.
   SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
     // Define the Scope debug information entry.
-    DIE *NestedDIE = ConstructScopeDIE(Scopes[j]);
-    if (NestedDIE) 
-      ScopeDIE->AddChild(NestedDIE);
+    DIE *NestedDIE = constructScopeDIE(Scopes[j]);
+    if (NestedDIE)
+      ScopeDIE->addChild(NestedDIE);
   }
-  return ScopeDIE;
+
+  if (DS.isSubprogram()) 
+    addPubTypes(DISubprogram(DS.getNode()));
+ 
+ return ScopeDIE;
 }
 
 /// GetOrCreateSourceID - Look up the source id with the given directory and
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName,
-                                         const char *FileName) {
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) {
   unsigned DId;
   StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
   if (DI != DirectoryIdMap.end()) {
@@ -1657,33 +1579,34 @@ unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName,
   return SrcId;
 }
 
-void DwarfDebug::ConstructCompileUnit(MDNode *N) {
+void DwarfDebug::constructCompileUnit(MDNode *N) {
   DICompileUnit DIUnit(N);
-  const char *FN = DIUnit.getFilename();
-  const char *Dir = DIUnit.getDirectory();
+  StringRef FN = DIUnit.getFilename();
+  StringRef Dir = DIUnit.getDirectory();
   unsigned ID = GetOrCreateSourceID(Dir, FN);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+  addSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
                    DWLabel("section_line", 0), DWLabel("section_line", 0),
                    false);
-  AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+  addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
             DIUnit.getProducer());
-  AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
+  addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
           DIUnit.getLanguage());
-  AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+  addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
 
-  if (Dir)
-    AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+  if (!Dir.empty())
+    addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
   if (DIUnit.isOptimized())
-    AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
-  if (const char *Flags = DIUnit.getFlags())
-    AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+  StringRef Flags = DIUnit.getFlags();
+  if (!Flags.empty())
+    addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
 
   unsigned RVer = DIUnit.getRunTimeVersion();
   if (RVer)
-    AddUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+    addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
   CompileUnit *Unit = new CompileUnit(ID, Die);
@@ -1697,7 +1620,7 @@ void DwarfDebug::ConstructCompileUnit(MDNode *N) {
   CompileUnits.push_back(Unit);
 }
 
-void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) {
+void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   DIGlobalVariable DI_GV(N);
 
   // If debug information is malformed then ignore it.
@@ -1705,29 +1628,34 @@ void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) {
     return;
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getNode());
-  if (Slot)
+  if (ModuleCU->getDIE(DI_GV.getNode()))
     return;
 
-  DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV);
+  DIE *VariableDie = createGlobalVariableDIE(ModuleCU, DI_GV);
 
   // Add to map.
-  Slot = VariableDie;
+  ModuleCU->insertDIE(N, VariableDie);
 
   // Add to context owner.
-  ModuleCU->getDie()->AddChild(VariableDie);
+  ModuleCU->getCUDie()->addChild(VariableDie);
 
   // Expose as global. FIXME - need to check external flag.
-  ModuleCU->AddGlobal(DI_GV.getName(), VariableDie);
+  ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
+
+  DIType GTy = DI_GV.getType();
+  if (GTy.isCompositeType() && !GTy.getName().empty()) {
+    DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
+    assert (Entry && "Missing global type!");
+    ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
+  }
   return;
 }
 
-void DwarfDebug::ConstructSubprogram(MDNode *N) {
+void DwarfDebug::constructSubprogramDIE(MDNode *N) {
   DISubprogram SP(N);
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(N);
-  if (Slot)
+  if (ModuleCU->getDIE(N))
     return;
 
   if (!SP.isDefinition())
@@ -1735,23 +1663,24 @@ void DwarfDebug::ConstructSubprogram(MDNode *N) {
     // class type.
     return;
 
-  DIE *SubprogramDie = CreateSubprogramDIE(ModuleCU, SP);
+  DIE *SubprogramDie = createSubprogramDIE(ModuleCU, SP);
 
   // Add to map.
-  Slot = SubprogramDie;
+  ModuleCU->insertDIE(N, SubprogramDie);
 
   // Add to context owner.
-  ModuleCU->getDie()->AddChild(SubprogramDie);
+  ModuleCU->getCUDie()->addChild(SubprogramDie);
 
   // Expose as global.
-  ModuleCU->AddGlobal(SP.getName(), SubprogramDie);
+  ModuleCU->addGlobal(SP.getName(), SubprogramDie);
+
   return;
 }
 
-/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// beginModule - Emit all Dwarf sections that should come prior to the
 /// content. Create global DIEs and emit initial debug info sections.
 /// This is inovked by the target AsmPrinter.
-void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
+void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
   this->M = M;
 
   if (TimePassesIsEnabled)
@@ -1766,7 +1695,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   // Create all the compile unit DIEs.
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
          E = DbgFinder.compile_unit_end(); I != E; ++I)
-    ConstructCompileUnit(*I);
+    constructCompileUnit(*I);
 
   if (CompileUnits.empty()) {
     if (TimePassesIsEnabled)
@@ -1787,13 +1716,13 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
     if (GV.getContext().getNode() != GV.getCompileUnit().getNode())
       ScopedGVs.push_back(*I);
     else
-      ConstructGlobalVariableDIE(*I);
+      constructGlobalVariableDIE(*I);
   }
 
   // Create DIEs for each subprogram.
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
          E = DbgFinder.subprogram_end(); I != E; ++I)
-    ConstructSubprogram(*I);
+    constructSubprogramDIE(*I);
 
   MMI = mmi;
   shouldEmit = true;
@@ -1819,15 +1748,15 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   }
 
   // Emit initial sections
-  EmitInitial();
+  emitInitial();
 
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
 
-/// EndModule - Emit all Dwarf sections that should come after the content.
+/// endModule - Emit all Dwarf sections that should come after the content.
 ///
-void DwarfDebug::EndModule() {
+void DwarfDebug::endModule() {
   if (!ModuleCU)
     return;
 
@@ -1838,7 +1767,7 @@ void DwarfDebug::EndModule() {
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
          AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
     DIE *ISP = *AI;
-    AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+    addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
   // Standard sections final addresses.
@@ -1854,52 +1783,56 @@ void DwarfDebug::EndModule() {
   }
 
   // Emit common frame information.
-  EmitCommonDebugFrame();
+  emitCommonDebugFrame();
 
   // Emit function debug frame information
   for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
          E = DebugFrames.end(); I != E; ++I)
-    EmitFunctionDebugFrame(*I);
+    emitFunctionDebugFrame(*I);
 
   // Compute DIE offsets and sizes.
-  SizeAndOffsets();
+  computeSizeAndOffsets();
 
   // Emit all the DIEs into a debug info section
-  EmitDebugInfo();
+  emitDebugInfo();
 
   // Corresponding abbreviations into a abbrev section.
-  EmitAbbreviations();
+  emitAbbreviations();
 
   // Emit source line correspondence into a debug line section.
-  EmitDebugLines();
+  emitDebugLines();
 
   // Emit info into a debug pubnames section.
-  EmitDebugPubNames();
+  emitDebugPubNames();
+
+  // Emit info into a debug pubtypes section.
+  emitDebugPubTypes();
 
   // Emit info into a debug str section.
-  EmitDebugStr();
+  emitDebugStr();
 
   // Emit info into a debug loc section.
-  EmitDebugLoc();
+  emitDebugLoc();
 
   // Emit info into a debug aranges section.
   EmitDebugARanges();
 
   // Emit info into a debug ranges section.
-  EmitDebugRanges();
+  emitDebugRanges();
 
   // Emit info into a debug macinfo section.
-  EmitDebugMacInfo();
+  emitDebugMacInfo();
 
   // Emit inline info.
-  EmitDebugInlineInfo();
+  emitDebugInlineInfo();
 
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
 
 /// findAbstractVariable - Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+                                              unsigned FrameIdx,
                                               DILocation &ScopeLoc) {
 
   DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
@@ -1911,13 +1844,13 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx
     return NULL;
 
   AbsDbgVariable = new DbgVariable(Var, FrameIdx);
-  Scope->AddVariable(AbsDbgVariable);
+  Scope->addVariable(AbsDbgVariable);
   AbstractVariables[Var.getNode()] = AbsDbgVariable;
   return AbsDbgVariable;
 }
 
-/// CollectVariableInfo - Populate DbgScope entries with variables' info.
-void DwarfDebug::CollectVariableInfo() {
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void DwarfDebug::collectVariableInfo() {
   if (!MMI) return;
 
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
@@ -1933,31 +1866,32 @@ void DwarfDebug::CollectVariableInfo() {
     DbgScope *Scope =
       ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
     if (!Scope)
-      Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode()); 
+      Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
     DbgVariable *RegVar = new DbgVariable(DV, VP.first);
-    Scope->AddVariable(RegVar);
-    if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc))
+    Scope->addVariable(RegVar);
+    if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first,
+                                                           ScopeLoc))
       RegVar->setAbstractVariable(AbsDbgVariable);
   }
 }
 
-/// BeginScope - Process beginning of a scope starting at Label.
-void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) {
+/// beginScope - Process beginning of a scope starting at Label.
+void DwarfDebug::beginScope(const MachineInstr *MI, unsigned Label) {
   InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
   if (I == DbgScopeBeginMap.end())
     return;
-  ScopeVector &SD = DbgScopeBeginMap[MI];
+  ScopeVector &SD = I->second;
   for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI) 
+       SDI != SDE; ++SDI)
     (*SDI)->setStartLabelID(Label);
 }
 
-/// EndScope - Process end of a scope.
-void DwarfDebug::EndScope(const MachineInstr *MI) {
+/// endScope - Process end of a scope.
+void DwarfDebug::endScope(const MachineInstr *MI) {
   InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
   if (I == DbgScopeEndMap.end())
     return;
@@ -1967,7 +1901,7 @@ void DwarfDebug::EndScope(const MachineInstr *MI) {
 
   SmallVector<DbgScope *, 2> &SD = I->second;
   for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI) 
+       SDI != SDE; ++SDI)
     (*SDI)->setEndLabelID(Label);
   return;
 }
@@ -1981,7 +1915,7 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
       return;
     WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
     DbgScopeMap.insert(std::make_pair(Scope, WScope));
-    if (DIDescriptor(Scope).isLexicalBlock()) 
+    if (DIDescriptor(Scope).isLexicalBlock())
       createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
     return;
   }
@@ -1996,9 +1930,9 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
   createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
 }
 
-/// ExtractScopeInformation - Scan machine instructions in this function
+/// extractScopeInformation - Scan machine instructions in this function
 /// and collect DbgScopes. Return true, if atleast one scope was found.
-bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
+bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
   // If scope information was extracted using .dbg intrinsics then there is not
   // any need to extract these information by scanning each instruction.
   if (!DbgScopeMap.empty())
@@ -2015,7 +1949,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
       DebugLocTuple DLT = MF->getDebugLocTuple(DL);
       if (!DLT.Scope) continue;
       // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated 
+      // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
       if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
       createDbgScope(DLT.Scope, DLT.InlinedAtLoc);
@@ -2034,7 +1968,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
       DebugLocTuple DLT = MF->getDebugLocTuple(DL);
       if (!DLT.Scope)  continue;
       // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated 
+      // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
       if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
       DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
@@ -2049,7 +1983,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
     if (DI->second->isAbstractScope())
       continue;
     assert (DI->second->getFirstInsn() && "Invalid first instruction!");
-    DI->second->FixInstructionMarkers();
+    DI->second->fixInstructionMarkers();
     assert (DI->second->getLastInsn() && "Invalid last instruction!");
   }
 
@@ -2083,9 +2017,9 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
   return !DbgScopeMap.empty();
 }
 
-/// BeginFunction - Gather pre-function debug information.  Assumes being
+/// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
-void DwarfDebug::BeginFunction(MachineFunction *MF) {
+void DwarfDebug::beginFunction(MachineFunction *MF) {
   this->MF = MF;
 
   if (!ShouldEmitDwarfDebug()) return;
@@ -2093,9 +2027,10 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  if (!ExtractScopeInformation(MF))
+  if (!extractScopeInformation(MF))
     return;
-  CollectVariableInfo();
+
+  collectVariableInfo();
 
   // Begin accumulating function debug information.
   MMI->BeginFunction(MF);
@@ -2111,9 +2046,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
     unsigned LabelID = 0;
     DISubprogram SP = getDISubprogram(DLT.Scope);
     if (!SP.isNull())
-      LabelID = RecordSourceLine(SP.getLineNumber(), 0, DLT.Scope);
+      LabelID = recordSourceLine(SP.getLineNumber(), 0, DLT.Scope);
     else
-      LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
+      LabelID = recordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
     Asm->printLabel(LabelID);
     O << '\n';
   }
@@ -2121,9 +2056,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
     DebugTimer->stopTimer();
 }
 
-/// EndFunction - Gather and emit post-function debug information.
+/// endFunction - Gather and emit post-function debug information.
 ///
-void DwarfDebug::EndFunction(MachineFunction *MF) {
+void DwarfDebug::endFunction(MachineFunction *MF) {
   if (!ShouldEmitDwarfDebug()) return;
 
   if (TimePassesIsEnabled)
@@ -2148,10 +2083,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
 
   // Construct abstract scopes.
   for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
-         AE = AbstractScopesList.end(); AI != AE; ++AI) 
-    ConstructScopeDIE(*AI);
+         AE = AbstractScopesList.end(); AI != AE; ++AI)
+    constructScopeDIE(*AI);
 
-  ConstructScopeDIE(CurrentFnDbgScope);
+  constructScopeDIE(CurrentFnDbgScope);
 
   DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
                                                MMI->getFrameMoves()));
@@ -2172,10 +2107,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
     DebugTimer->stopTimer();
 }
 
-/// RecordSourceLine - Records location information and associates it with a
+/// recordSourceLine - Records location information and associates it with a
 /// label. Returns a unique label ID used to generate a label and provide
 /// correspondence to the source line list.
-unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, 
+unsigned DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
                                       MDNode *S) {
   if (!MMI)
     return 0;
@@ -2183,8 +2118,8 @@ unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col,
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  const char *Dir = NULL;
-  const char *Fn = NULL;
+  StringRef Dir;
+  StringRef Fn;
 
   DIDescriptor Scope(S);
   if (Scope.isCompileUnit()) {
@@ -2234,17 +2169,18 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
 // Emit Methods
 //===----------------------------------------------------------------------===//
 
-/// SizeAndOffsetDie - Compute the size and offset of a DIE.
+/// computeSizeAndOffset - Compute the size and offset of a DIE.
 ///
-unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
+unsigned
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
   // Get the children.
   const std::vector<DIE *> &Children = Die->getChildren();
 
   // If not last sibling and has children then add sibling offset attribute.
-  if (!Last && !Children.empty()) Die->AddSiblingOffset();
+  if (!Last && !Children.empty()) Die->addSiblingOffset();
 
   // Record the abbreviation.
-  AssignAbbrevNumber(Die->getAbbrev());
+  assignAbbrevNumber(Die->getAbbrev());
 
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
@@ -2270,7 +2206,7 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
            "Children flag not set");
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M);
+      Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
 
     // End of children marker.
     Offset += sizeof(int8_t);
@@ -2280,9 +2216,9 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
   return Offset;
 }
 
-/// SizeAndOffsets - Compute the size and offset of all the DIEs.
+/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
 ///
-void DwarfDebug::SizeAndOffsets() {
+void DwarfDebug::computeSizeAndOffsets() {
   // Compute size of compile unit header.
   static unsigned Offset =
     sizeof(int32_t) + // Length of Compilation Unit Info
@@ -2290,13 +2226,13 @@ void DwarfDebug::SizeAndOffsets() {
     sizeof(int32_t) + // Offset Into Abbrev. Section
     sizeof(int8_t);   // Pointer Size (in bytes)
 
-  SizeAndOffsetDie(ModuleCU->getDie(), Offset, true);
+  computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
   CompileUnitOffsets[ModuleCU] = 0;
 }
 
-/// EmitInitial - Emit initial Dwarf declarations.  This is necessary for cc
+/// emitInitial - Emit initial Dwarf declarations.  This is necessary for cc
 /// tools to recognize the object file contains Dwarf information.
-void DwarfDebug::EmitInitial() {
+void DwarfDebug::emitInitial() {
   // Check to see if we already emitted intial headers.
   if (didInitial) return;
   didInitial = true;
@@ -2327,6 +2263,8 @@ void DwarfDebug::EmitInitial() {
   EmitLabel("section_loc", 0);
   Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection());
   EmitLabel("section_pubnames", 0);
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubTypesSection());
+  EmitLabel("section_pubtypes", 0);
   Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection());
   EmitLabel("section_str", 0);
   Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection());
@@ -2338,9 +2276,9 @@ void DwarfDebug::EmitInitial() {
   EmitLabel("data_begin", 0);
 }
 
-/// EmitDIE - Recusively Emits a debug information entry.
+/// emitDIE - Recusively Emits a debug information entry.
 ///
-void DwarfDebug::EmitDIE(DIE *Die) {
+void DwarfDebug::emitDIE(DIE *Die) {
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
   const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
@@ -2370,7 +2308,7 @@ void DwarfDebug::EmitDIE(DIE *Die) {
 
     switch (Attr) {
     case dwarf::DW_AT_sibling:
-      Asm->EmitInt32(Die->SiblingOffset());
+      Asm->EmitInt32(Die->getSiblingOffset());
       break;
     case dwarf::DW_AT_abstract_origin: {
       DIEEntry *E = cast<DIEEntry>(Values[i]);
@@ -2393,16 +2331,16 @@ void DwarfDebug::EmitDIE(DIE *Die) {
     const std::vector<DIE *> &Children = Die->getChildren();
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      EmitDIE(Children[j]);
+      emitDIE(Children[j]);
 
     Asm->EmitInt8(0); Asm->EOL("End Of Children Mark");
   }
 }
 
-/// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+/// emitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
 ///
-void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
-  DIE *Die = Unit->getDie();
+void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) {
+  DIE *Die = Unit->getCUDie();
 
   // Emit the compile units header.
   EmitLabel("info_begin", Unit->getID());
@@ -2420,7 +2358,7 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
   Asm->EOL("Offset Into Abbrev. Section");
   Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
 
-  EmitDIE(Die);
+  emitDIE(Die);
   // FIXME - extra padding for gdb bug.
   Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
   Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
@@ -2431,17 +2369,17 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
   Asm->EOL();
 }
 
-void DwarfDebug::EmitDebugInfo() {
+void DwarfDebug::emitDebugInfo() {
   // Start debug info section.
   Asm->OutStreamer.SwitchSection(
                             Asm->getObjFileLowering().getDwarfInfoSection());
 
-  EmitDebugInfoPerCU(ModuleCU);
+  emitDebugInfoPerCU(ModuleCU);
 }
 
-/// EmitAbbreviations - Emit the abbreviation section.
+/// emitAbbreviations - Emit the abbreviation section.
 ///
-void DwarfDebug::EmitAbbreviations() const {
+void DwarfDebug::emitAbbreviations() const {
   // Check to see if it is worth the effort.
   if (!Abbreviations.empty()) {
     // Start the debug abbrev section.
@@ -2473,10 +2411,10 @@ void DwarfDebug::EmitAbbreviations() const {
   }
 }
 
-/// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+/// emitEndOfLineMatrix - Emit the last address of the section and the end of
 /// the line matrix.
 ///
-void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   // Define last address of section.
   Asm->EmitInt8(0); Asm->EOL("Extended Op");
   Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size");
@@ -2489,9 +2427,9 @@ void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1); Asm->EOL();
 }
 
-/// EmitDebugLines - Emit source line information.
+/// emitDebugLines - Emit source line information.
 ///
-void DwarfDebug::EmitDebugLines() {
+void DwarfDebug::emitDebugLines() {
   // If the target is using .loc/.file, the assembler will be emitting the
   // .debug_line table automatically.
   if (MAI->hasDotLocAndDotFile())
@@ -2640,22 +2578,22 @@ void DwarfDebug::EmitDebugLines() {
       }
     }
 
-    EmitEndOfLineMatrix(j + 1);
+    emitEndOfLineMatrix(j + 1);
   }
 
   if (SecSrcLinesSize == 0)
     // Because we're emitting a debug_line section, we still need a line
     // table. The linker and friends expect it to exist. If there's nothing to
     // put into it, emit an empty table.
-    EmitEndOfLineMatrix(1);
+    emitEndOfLineMatrix(1);
 
   EmitLabel("line_end", 0);
   Asm->EOL();
 }
 
-/// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
 ///
-void DwarfDebug::EmitCommonDebugFrame() {
+void DwarfDebug::emitCommonDebugFrame() {
   if (!MAI->doesDwarfRequireFrameSection())
     return;
 
@@ -2698,10 +2636,10 @@ void DwarfDebug::EmitCommonDebugFrame() {
   Asm->EOL();
 }
 
-/// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+/// emitFunctionDebugFrame - Emit per function frame info into a debug frame
 /// section.
 void
-DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
+DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
   if (!MAI->doesDwarfRequireFrameSection())
     return;
 
@@ -2734,7 +2672,7 @@ DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
   Asm->EOL();
 }
 
-void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
+void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) {
   EmitDifference("pubnames_end", Unit->getID(),
                  "pubnames_begin", Unit->getID(), true);
   Asm->EOL("Length of Public Names Info");
@@ -2751,7 +2689,7 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
                  true);
   Asm->EOL("Compilation Unit Length");
 
-  StringMap<DIE*> &Globals = Unit->getGlobals();
+  const StringMap<DIE*> &Globals = Unit->getGlobals();
   for (StringMap<DIE*>::const_iterator
          GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
     const char *Name = GI->getKeyData();
@@ -2767,19 +2705,55 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
   Asm->EOL();
 }
 
-/// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
 ///
-void DwarfDebug::EmitDebugPubNames() {
+void DwarfDebug::emitDebugPubNames() {
   // Start the dwarf pubnames section.
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfPubNamesSection());
 
-  EmitDebugPubNamesPerCU(ModuleCU);
+  emitDebugPubNamesPerCU(ModuleCU);
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+  // Start the dwarf pubnames section.
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfPubTypesSection());
+  EmitDifference("pubtypes_end", ModuleCU->getID(),
+                 "pubtypes_begin", ModuleCU->getID(), true);
+  Asm->EOL("Length of Public Types Info");
+
+  EmitLabel("pubtypes_begin", ModuleCU->getID());
+
+  Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version");
+
+  EmitSectionOffset("info_begin", "section_info",
+                    ModuleCU->getID(), 0, true, false);
+  Asm->EOL("Offset of Compilation ModuleCU Info");
+
+  EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
+                 true);
+  Asm->EOL("Compilation ModuleCU Length");
+
+  const StringMap<DIE*> &Globals = ModuleCU->getGlobalTypes();
+  for (StringMap<DIE*>::const_iterator
+         GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+    const char *Name = GI->getKeyData();
+    DIE * Entity = GI->second;
+
+    Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset");
+    Asm->EmitString(Name, strlen(Name)); Asm->EOL("External Name");
+  }
+
+  Asm->EmitInt32(0); Asm->EOL("End Mark");
+  EmitLabel("pubtypes_end", ModuleCU->getID());
+
+  Asm->EOL();
 }
 
-/// EmitDebugStr - Emit visible names into a debug str section.
+/// emitDebugStr - Emit visible names into a debug str section.
 ///
-void DwarfDebug::EmitDebugStr() {
+void DwarfDebug::emitDebugStr() {
   // Check to see if it is worth the effort.
   if (!StringPool.empty()) {
     // Start the dwarf str section.
@@ -2801,9 +2775,9 @@ void DwarfDebug::EmitDebugStr() {
   }
 }
 
-/// EmitDebugLoc - Emit visible names into a debug loc section.
+/// emitDebugLoc - Emit visible names into a debug loc section.
 ///
-void DwarfDebug::EmitDebugLoc() {
+void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
                               Asm->getObjFileLowering().getDwarfLocSection());
@@ -2847,18 +2821,18 @@ void DwarfDebug::EmitDebugARanges() {
   Asm->EOL();
 }
 
-/// EmitDebugRanges - Emit visible names into a debug ranges section.
+/// emitDebugRanges - Emit visible names into a debug ranges section.
 ///
-void DwarfDebug::EmitDebugRanges() {
+void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
                             Asm->getObjFileLowering().getDwarfRangesSection());
   Asm->EOL();
 }
 
-/// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
 ///
-void DwarfDebug::EmitDebugMacInfo() {
+void DwarfDebug::emitDebugMacInfo() {
   if (const MCSection *LineInfo =
       Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
     // Start the dwarf macinfo section.
@@ -2867,7 +2841,7 @@ void DwarfDebug::EmitDebugMacInfo() {
   }
 }
 
-/// EmitDebugInlineInfo - Emit inline info using following format.
+/// emitDebugInlineInfo - Emit inline info using following format.
 /// Section Header:
 /// 1. length of section
 /// 2. Dwarf version number
@@ -2885,7 +2859,7 @@ void DwarfDebug::EmitDebugMacInfo() {
 /// inlined instance; the die_offset points to the inlined_subroutine die in the
 /// __debug_info section, and the low_pc is the starting address for the
 /// inlining instance.
-void DwarfDebug::EmitDebugInlineInfo() {
+void DwarfDebug::emitDebugInlineInfo() {
   if (!MAI->doesDwarfUsesInlineInfoSection())
     return;
 
@@ -2906,17 +2880,18 @@ void DwarfDebug::EmitDebugInlineInfo() {
 
   for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
-    
+
 //  for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
     //        I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
     MDNode *Node = *I;
-    ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node);
+    ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+      = InlineInfo.find(Node);
     SmallVector<InlineInfoLabels, 4> &Labels = II->second;
     DISubprogram SP(Node);
-    const char *LName = SP.getLinkageName();
-    const char *Name = SP.getName();
+    StringRef LName = SP.getLinkageName();
+    StringRef Name = SP.getName();
 
-    if (!LName)
+    if (LName.empty())
       Asm->EmitString(Name);
     else {
       // Skip special LLVM prefix that is used to inform the asm printer to not
@@ -2924,14 +2899,14 @@ void DwarfDebug::EmitDebugInlineInfo() {
       // Objective-C symbol names and symbol whose name is replaced using GCC's
       // __asm__ attribute.
       if (LName[0] == 1)
-        LName = &LName[1];
+        LName = LName.substr(1);
 //      Asm->EmitString(LName);
       EmitSectionOffset("string", "section_str",
                         StringPool.idFor(LName), false, true);
 
     }
     Asm->EOL("MIPS linkage name");
-//    Asm->EmitString(Name); 
+//    Asm->EmitString(Name);
     EmitSectionOffset("string", "section_str",
                       StringPool.idFor(Name), false, true);
     Asm->EOL("Function name");
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 646de8f..679d9b9 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -106,13 +106,9 @@ class DwarfDebug : public Dwarf {
   /// Lines - List of of source line correspondence.
   std::vector<SrcLineInfo> Lines;
 
-  /// ValuesSet - Used to uniquely define values.
+  /// DIEValues - A list of all the unique values in use.
   ///
-  FoldingSet<DIEValue> ValuesSet;
-
-  /// Values - A list of all the unique values in use.
-  ///
-  std::vector<DIEValue *> Values;
+  std::vector<DIEValue *> DIEValues;
 
   /// StringPool - A UniqueVector of strings used by indirect references.
   ///
@@ -229,137 +225,135 @@ class DwarfDebug : public Dwarf {
     return SourceIds.size();
   }
 
-  /// AssignAbbrevNumber - Define a unique number for the abbreviation.
+  /// assignAbbrevNumber - Define a unique number for the abbreviation.
   ///
-  void AssignAbbrevNumber(DIEAbbrev &Abbrev);
+  void assignAbbrevNumber(DIEAbbrev &Abbrev);
 
-  /// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
   /// information entry.
-  DIEEntry *CreateDIEEntry(DIE *Entry = NULL);
-
-  /// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
-  ///
-  void SetDIEEntry(DIEEntry *Value, DIE *Entry);
+  DIEEntry *createDIEEntry(DIE *Entry = NULL);
 
-  /// AddUInt - Add an unsigned integer attribute data and value.
+  /// addUInt - Add an unsigned integer attribute data and value.
   ///
-  void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
 
-  /// AddSInt - Add an signed integer attribute data and value.
+  /// addSInt - Add an signed integer attribute data and value.
   ///
-  void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
 
-  /// AddString - Add a string attribute data and value.
+  /// addString - Add a string attribute data and value.
   ///
-  void AddString(DIE *Die, unsigned Attribute, unsigned Form,
-                 const std::string &String);
+  void addString(DIE *Die, unsigned Attribute, unsigned Form,
+                 const StringRef Str);
 
-  /// AddLabel - Add a Dwarf label attribute data and value.
+  /// addLabel - Add a Dwarf label attribute data and value.
   ///
-  void AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
                 const DWLabel &Label);
 
-  /// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+  /// addObjectLabel - Add an non-Dwarf label attribute data and value.
   ///
-  void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+  void addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
                       const std::string &Label);
 
-  /// AddSectionOffset - Add a section offset label attribute data and value.
+  /// addSectionOffset - Add a section offset label attribute data and value.
   ///
-  void AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+  void addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
                         const DWLabel &Label, const DWLabel &Section,
                         bool isEH = false, bool useSet = true);
 
-  /// AddDelta - Add a label delta attribute data and value.
+  /// addDelta - Add a label delta attribute data and value.
   ///
-  void AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
                 const DWLabel &Hi, const DWLabel &Lo);
 
-  /// AddDIEEntry - Add a DIE attribute data and value.
+  /// addDIEEntry - Add a DIE attribute data and value.
   ///
-  void AddDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
-    Die->AddValue(Attribute, Form, CreateDIEEntry(Entry));
+  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
+    Die->addValue(Attribute, Form, createDIEEntry(Entry));
   }
 
-  /// AddBlock - Add block data.
+  /// addBlock - Add block data.
   ///
-  void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
 
-  /// AddSourceLine - Add location information to specified debug information
+  /// addSourceLine - Add location information to specified debug information
   /// entry.
-  void AddSourceLine(DIE *Die, const DIVariable *V);
-  void AddSourceLine(DIE *Die, const DIGlobal *G);
-  void AddSourceLine(DIE *Die, const DISubprogram *SP);
-  void AddSourceLine(DIE *Die, const DIType *Ty);
+  void addSourceLine(DIE *Die, const DIVariable *V);
+  void addSourceLine(DIE *Die, const DIGlobal *G);
+  void addSourceLine(DIE *Die, const DISubprogram *SP);
+  void addSourceLine(DIE *Die, const DIType *Ty);
 
-  /// AddAddress - Add an address attribute to a die based on the location
+  /// addAddress - Add an address attribute to a die based on the location
   /// provided.
-  void AddAddress(DIE *Die, unsigned Attribute,
+  void addAddress(DIE *Die, unsigned Attribute,
                   const MachineLocation &Location);
 
-  /// AddComplexAddress - Start with the address based on the location provided,
+  /// addComplexAddress - Start with the address based on the location provided,
   /// and generate the DWARF information necessary to find the actual variable
   /// (navigating the extra location information encoded in the type) based on
   /// the starting location.  Add the DWARF information to the die.
   ///
-  void AddComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
                          const MachineLocation &Location);
 
-  // FIXME: Should be reformulated in terms of AddComplexAddress.
-  /// AddBlockByrefAddress - Start with the address based on the location
+  // FIXME: Should be reformulated in terms of addComplexAddress.
+  /// addBlockByrefAddress - Start with the address based on the location
   /// provided, and generate the DWARF information necessary to find the
   /// actual Block variable (navigating the Block struct) based on the
   /// starting location.  Add the DWARF information to the die.  Obsolete,
-  /// please use AddComplexAddress instead.
+  /// please use addComplexAddress instead.
   ///
-  void AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
                             const MachineLocation &Location);
 
-  /// AddType - Add a new type attribute to the specified entity.
-  void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+  /// addType - Add a new type attribute to the specified entity.
+  void addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+
+  void addPubTypes(DISubprogram SP);
 
-  /// ConstructTypeDIE - Construct basic type die from DIBasicType.
-  void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  /// constructTypeDIE - Construct basic type die from DIBasicType.
+  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                         DIBasicType BTy);
 
-  /// ConstructTypeDIE - Construct derived type die from DIDerivedType.
-  void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  /// constructTypeDIE - Construct derived type die from DIDerivedType.
+  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                         DIDerivedType DTy);
 
-  /// ConstructTypeDIE - Construct type DIE from DICompositeType.
-  void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  /// constructTypeDIE - Construct type DIE from DICompositeType.
+  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                         DICompositeType CTy);
 
-  /// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
-  void ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
 
-  /// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, 
+  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+  void constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, 
                              DICompositeType *CTy);
 
-  /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  DIE *ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
+  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+  DIE *constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
 
-  /// CreateGlobalVariableDIE - Create new DIE using GV.
-  DIE *CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+  /// createGlobalVariableDIE - Create new DIE using GV.
+  DIE *createGlobalVariableDIE(CompileUnit *DW_Unit,
                                const DIGlobalVariable &GV);
 
-  /// CreateMemberDIE - Create new member DIE.
-  DIE *CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
+  /// createMemberDIE - Create new member DIE.
+  DIE *createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
 
-  /// CreateSubprogramDIE - Create new DIE using SP.
-  DIE *CreateSubprogramDIE(CompileUnit *DW_Unit,
+  /// createSubprogramDIE - Create new DIE using SP.
+  DIE *createSubprogramDIE(CompileUnit *DW_Unit,
                            const DISubprogram &SP,
                            bool IsConstructor = false,
                            bool IsInlined = false);
 
-  /// FindCompileUnit - Get the compile unit for the given descriptor. 
+  /// findCompileUnit - Get the compile unit for the given descriptor. 
   ///
-  CompileUnit &FindCompileUnit(DICompileUnit Unit) const;
+  CompileUnit &findCompileUnit(DICompileUnit Unit) const;
 
-  /// CreateDbgScopeVariable - Create a new scope variable.
+  /// createDbgScopeVariable - Create a new scope variable.
   ///
-  DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
+  DIE *createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
 
   /// getUpdatedDbgScope - Find or create DbgScope assicated with 
   /// the instruction. Initialize scope and update scope hierarchy.
@@ -374,88 +368,101 @@ class DwarfDebug : public Dwarf {
   DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, 
                                     DILocation &Loc);
 
-  DIE *UpdateSubprogramScopeDIE(MDNode *SPNode);
-  DIE *ConstructLexicalScopeDIE(DbgScope *Scope);
-  DIE *ConstructScopeDIE(DbgScope *Scope);
-  DIE *ConstructInlinedScopeDIE(DbgScope *Scope);
-  DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
+  /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
+  /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+  /// If there are global variables in this scope then create and insert
+  /// DIEs for these variables.
+  DIE *updateSubprogramScopeDIE(MDNode *SPNode);
 
-  /// ConstructDbgScope - Construct the components of a scope.
-  ///
-  void ConstructDbgScope(DbgScope *ParentScope,
-                         unsigned ParentStartID, unsigned ParentEndID,
-                         DIE *ParentDie, CompileUnit *Unit);
+  /// constructLexicalScope - Construct new DW_TAG_lexical_block 
+  /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+  DIE *constructLexicalScopeDIE(DbgScope *Scope);
 
-  /// EmitInitial - Emit initial Dwarf declarations.  This is necessary for cc
+  /// constructInlinedScopeDIE - This scope represents inlined body of
+  /// a function. Construct DIE to represent this concrete inlined copy
+  /// of the function.
+  DIE *constructInlinedScopeDIE(DbgScope *Scope);
+
+  /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+  DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
+
+  /// constructScopeDIE - Construct a DIE for this scope.
+  DIE *constructScopeDIE(DbgScope *Scope);
+
+  /// emitInitial - Emit initial Dwarf declarations.  This is necessary for cc
   /// tools to recognize the object file contains Dwarf information.
-  void EmitInitial();
+  void emitInitial();
 
-  /// EmitDIE - Recusively Emits a debug information entry.
+  /// emitDIE - Recusively Emits a debug information entry.
   ///
-  void EmitDIE(DIE *Die);
+  void emitDIE(DIE *Die);
 
-  /// SizeAndOffsetDie - Compute the size and offset of a DIE.
+  /// computeSizeAndOffset - Compute the size and offset of a DIE.
   ///
-  unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last);
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
 
-  /// SizeAndOffsets - Compute the size and offset of all the DIEs.
+  /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
   ///
-  void SizeAndOffsets();
+  void computeSizeAndOffsets();
 
-  /// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+  /// EmitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
   ///
-  void EmitDebugInfoPerCU(CompileUnit *Unit);
+  void emitDebugInfoPerCU(CompileUnit *Unit);
 
-  void EmitDebugInfo();
+  void emitDebugInfo();
 
-  /// EmitAbbreviations - Emit the abbreviation section.
+  /// emitAbbreviations - Emit the abbreviation section.
   ///
-  void EmitAbbreviations() const;
+  void emitAbbreviations() const;
 
-  /// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+  /// emitEndOfLineMatrix - Emit the last address of the section and the end of
   /// the line matrix.
   ///
-  void EmitEndOfLineMatrix(unsigned SectionEnd);
+  void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// EmitDebugLines - Emit source line information.
+  /// emitDebugLines - Emit source line information.
   ///
-  void EmitDebugLines();
+  void emitDebugLines();
 
-  /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+  /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
   ///
-  void EmitCommonDebugFrame();
+  void emitCommonDebugFrame();
 
-  /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+  /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
   /// section.
-  void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
+  void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
 
-  void EmitDebugPubNamesPerCU(CompileUnit *Unit);
+  void emitDebugPubNamesPerCU(CompileUnit *Unit);
 
-  /// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+  /// emitDebugPubNames - Emit visible names into a debug pubnames section.
   ///
-  void EmitDebugPubNames();
+  void emitDebugPubNames();
 
-  /// EmitDebugStr - Emit visible names into a debug str section.
+  /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
   ///
-  void EmitDebugStr();
+  void emitDebugPubTypes();
 
-  /// EmitDebugLoc - Emit visible names into a debug loc section.
+  /// emitDebugStr - Emit visible names into a debug str section.
   ///
-  void EmitDebugLoc();
+  void emitDebugStr();
+
+  /// emitDebugLoc - Emit visible names into a debug loc section.
+  ///
+  void emitDebugLoc();
 
   /// EmitDebugARanges - Emit visible names into a debug aranges section.
   ///
   void EmitDebugARanges();
 
-  /// EmitDebugRanges - Emit visible names into a debug ranges section.
+  /// emitDebugRanges - Emit visible names into a debug ranges section.
   ///
-  void EmitDebugRanges();
+  void emitDebugRanges();
 
-  /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+  /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
   ///
-  void EmitDebugMacInfo();
+  void emitDebugMacInfo();
 
-  /// EmitDebugInlineInfo - Emit inline info using following format.
+  /// emitDebugInlineInfo - Emit inline info using following format.
   /// Section Header:
   /// 1. length of section
   /// 2. Dwarf version number
@@ -473,26 +480,25 @@ class DwarfDebug : public Dwarf {
   /// inlined instance; the die_offset points to the inlined_subroutine die in
   /// the __debug_info section, and the low_pc is the starting address  for the
   ///  inlining instance.
-  void EmitDebugInlineInfo();
+  void emitDebugInlineInfo();
 
   /// GetOrCreateSourceID - Look up the source id with the given directory and
   /// source file names. If none currently exists, create a new id and insert it
   /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
   /// as well.
-  unsigned GetOrCreateSourceID(const char *DirName,
-                               const char *FileName);
+  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
 
-  void ConstructCompileUnit(MDNode *N);
+  void constructCompileUnit(MDNode *N);
 
-  void ConstructGlobalVariableDIE(MDNode *N);
+  void constructGlobalVariableDIE(MDNode *N);
 
-  void ConstructSubprogram(MDNode *N);
+  void constructSubprogramDIE(MDNode *N);
 
   // FIXME: This should go away in favor of complex addresses.
   /// Find the type the programmer originally declared the variable to be
   /// and return that type.  Obsolete, use GetComplexAddrType instead.
   ///
-  DIType GetBlockByrefType(DIType Ty, std::string Name);
+  DIType getBlockByrefType(DIType Ty, std::string Name);
 
 public:
   //===--------------------------------------------------------------------===//
@@ -505,30 +511,30 @@ public:
   /// be emitted.
   bool ShouldEmitDwarfDebug() const { return shouldEmit; }
 
-  /// BeginModule - Emit all Dwarf sections that should come prior to the
+  /// beginModule - Emit all Dwarf sections that should come prior to the
   /// content.
-  void BeginModule(Module *M, MachineModuleInfo *MMI);
+  void beginModule(Module *M, MachineModuleInfo *MMI);
 
-  /// EndModule - Emit all Dwarf sections that should come after the content.
+  /// endModule - Emit all Dwarf sections that should come after the content.
   ///
-  void EndModule();
+  void endModule();
 
-  /// BeginFunction - Gather pre-function debug information.  Assumes being
+  /// beginFunction - Gather pre-function debug information.  Assumes being
   /// emitted immediately after the function entry point.
-  void BeginFunction(MachineFunction *MF);
+  void beginFunction(MachineFunction *MF);
 
-  /// EndFunction - Gather and emit post-function debug information.
+  /// endFunction - Gather and emit post-function debug information.
   ///
-  void EndFunction(MachineFunction *MF);
+  void endFunction(MachineFunction *MF);
 
-  /// RecordSourceLine - Records location information and associates it with a 
+  /// recordSourceLine - Records location information and associates it with a 
   /// label. Returns a unique label ID used to generate a label and provide
   /// correspondence to the source line list.
-  unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
+  unsigned recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
 
-  /// getRecordSourceLineCount - Return the number of source lines in the debug
+  /// getSourceLineCount - Return the number of source lines in the debug
   /// info.
-  unsigned getRecordSourceLineCount() const {
+  unsigned getSourceLineCount() const {
     return Lines.size();
   }
                             
@@ -540,22 +546,18 @@ public:
   unsigned getOrCreateSourceID(const std::string &DirName,
                                const std::string &FileName);
 
-  /// ExtractScopeInformation - Scan machine instructions in this function
+  /// extractScopeInformation - Scan machine instructions in this function
   /// and collect DbgScopes. Return true, if atleast one scope was found.
-  bool ExtractScopeInformation(MachineFunction *MF);
-
-  /// CollectVariableInfo - Populate DbgScope entries with variables' info.
-  void CollectVariableInfo();
+  bool extractScopeInformation(MachineFunction *MF);
 
-  /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
-  /// end with this machine instruction.
-  void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label);
+  /// collectVariableInfo - Populate DbgScope entries with variables' info.
+  void collectVariableInfo();
 
-  /// BeginScope - Process beginning of a scope starting at Label.
-  void BeginScope(const MachineInstr *MI, unsigned Label);
+  /// beginScope - Process beginning of a scope starting at Label.
+  void beginScope(const MachineInstr *MI, unsigned Label);
 
-  /// EndScope - Prcess end of a scope.
-  void EndScope(const MachineInstr *MI);
+  /// endScope - Prcess end of a scope.
+  void endScope(const MachineInstr *MI);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index fcdcfd3..1c8b8f4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -727,8 +727,7 @@ void DwarfException::EmitExceptionTable() {
     // somewhere.  This predicate should be moved to a shared location that is
     // in target-independent code.
     //
-    if ((LSDASection->getKind().isWriteable() &&
-         !LSDASection->getKind().isReadOnlyWithRel()) ||
+    if (LSDASection->getKind().isWriteable() ||
         Asm->TM.getRelocationModel() == Reloc::Static)
       TTypeFormat = dwarf::DW_EH_PE_absptr;
     else
@@ -918,36 +917,14 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Catch TypeInfos.
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  unsigned Index = 1;
-
   for (std::vector<GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
-    const GlobalVariable *TI = *I;
-
-    if (TI) {
-      if (!LSDASection->getKind().isReadOnlyWithRel() &&
-          (TTypeFormat == dwarf::DW_EH_PE_absptr ||
-           TI->getLinkage() == GlobalValue::InternalLinkage)) {
-        // Print out the unadorned name of the type info.
-        PrintRelDirective();
-        O << Asm->Mang->getMangledName(TI);
-      } else {
-        bool IsTypeInfoIndirect = false, IsTypeInfoPCRel = false;
-        const MCExpr *TypeInfoRef =
-          TLOF.getSymbolForDwarfGlobalReference(TI, Asm->Mang, Asm->MMI,
-                                                IsTypeInfoIndirect,
-                                                IsTypeInfoPCRel);
-
-        if (!IsTypeInfoPCRel)
-          TypeInfoRef = CreateLabelDiff(TypeInfoRef, "typeinforef_addr",
-                                        Index++);
-
-        O << MAI->getData32bitsDirective();
-        TypeInfoRef->print(O, MAI);
-      }
+    const GlobalVariable *GV = *I;
+    PrintRelDirective();
+
+    if (GV) {
+      O << Asm->Mang->getMangledName(GV);
     } else {
-      PrintRelDirective();
       O << "0x0";
     }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 63ae653..dd8d88a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -43,14 +43,14 @@ void DwarfWriter::BeginModule(Module *M,
   DE = new DwarfException(OS, A, T);
   DD = new DwarfDebug(OS, A, T);
   DE->BeginModule(M, MMI);
-  DD->BeginModule(M, MMI);
+  DD->beginModule(M, MMI);
 }
 
 /// EndModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfWriter::EndModule() {
   DE->EndModule();
-  DD->EndModule();
+  DD->endModule();
   delete DD; DD = 0;
   delete DE; DE = 0;
 }
@@ -59,13 +59,13 @@ void DwarfWriter::EndModule() {
 /// emitted immediately after the function entry point.
 void DwarfWriter::BeginFunction(MachineFunction *MF) {
   DE->BeginFunction(MF);
-  DD->BeginFunction(MF);
+  DD->beginFunction(MF);
 }
 
 /// EndFunction - Gather and emit post-function debug information.
 ///
 void DwarfWriter::EndFunction(MachineFunction *MF) {
-  DD->EndFunction(MF);
+  DD->endFunction(MF);
   DE->EndFunction();
 
   if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI())
@@ -78,12 +78,12 @@ void DwarfWriter::EndFunction(MachineFunction *MF) {
 /// correspondence to the source line list.
 unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, 
                                        MDNode *Scope) {
-  return DD->RecordSourceLine(Line, Col, Scope);
+  return DD->recordSourceLine(Line, Col, Scope);
 }
 
 /// getRecordSourceLineCount - Count source lines.
 unsigned DwarfWriter::getRecordSourceLineCount() {
-  return DD->getRecordSourceLineCount();
+  return DD->getSourceLineCount();
 }
 
 /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
@@ -93,8 +93,8 @@ bool DwarfWriter::ShouldEmitDwarfDebug() const {
 }
 
 void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) {
-  DD->BeginScope(MI, L);
+  DD->beginScope(MI, L);
 }
 void DwarfWriter::EndScope(const MachineInstr *MI) {
-  DD->EndScope(MI);
+  DD->endScope(MI);
 }
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index f807e8f..8a62eb2 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -41,8 +41,6 @@ using namespace llvm;
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
-STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
-STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
 
 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -205,16 +203,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     MadeChange |= MadeChangeThisIteration;
   }
 
-  // Do tail duplication after tail merging is done.  Otherwise it is
-  // tough to avoid situations where tail duplication and tail merging undo
-  // each other's transformations ad infinitum.
-  MadeChangeThisIteration = true;
-  while (MadeChangeThisIteration) {
-    MadeChangeThisIteration = false;
-    MadeChangeThisIteration |= TailDuplicateBlocks(MF);
-    MadeChange |= MadeChangeThisIteration;
-  }
-
   // See if any jump tables have become mergable or dead as the code generator
   // did its thing.
   MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
@@ -918,71 +906,6 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
 }
 
 
-/// CanFallThrough - Return true if the specified block (with the specified
-/// branch condition) can implicitly transfer control to the block after it by
-/// falling off the end of it.  This should return false if it can reach the
-/// block after it, but it uses an explicit branch to do so (e.g. a table jump).
-///
-/// True is a conservative answer.
-///
-bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
-                                  bool BranchUnAnalyzable,
-                                  MachineBasicBlock *TBB,
-                                  MachineBasicBlock *FBB,
-                                  const SmallVectorImpl<MachineOperand> &Cond) {
-  MachineFunction::iterator Fallthrough = CurBB;
-  ++Fallthrough;
-  // If FallthroughBlock is off the end of the function, it can't fall through.
-  if (Fallthrough == CurBB->getParent()->end())
-    return false;
-
-  // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
-  if (!CurBB->isSuccessor(Fallthrough))
-    return false;
-
-  // If we couldn't analyze the branch, examine the last instruction.
-  // If the block doesn't end in a known control barrier, assume fallthrough
-  // is possible. The isPredicable check is needed because this code can be
-  // called during IfConversion, where an instruction which is normally a
-  // Barrier is predicated and thus no longer an actual control barrier. This
-  // is over-conservative though, because if an instruction isn't actually
-  // predicated we could still treat it like a barrier.
-  if (BranchUnAnalyzable)
-    return CurBB->empty() || !CurBB->back().getDesc().isBarrier() ||
-           CurBB->back().getDesc().isPredicable();
-
-  // If there is no branch, control always falls through.
-  if (TBB == 0) return true;
-
-  // If there is some explicit branch to the fallthrough block, it can obviously
-  // reach, even though the branch should get folded to fall through implicitly.
-  if (MachineFunction::iterator(TBB) == Fallthrough ||
-      MachineFunction::iterator(FBB) == Fallthrough)
-    return true;
-
-  // If it's an unconditional branch to some block not the fall through, it
-  // doesn't fall through.
-  if (Cond.empty()) return false;
-
-  // Otherwise, if it is conditional and has no explicit false block, it falls
-  // through.
-  return FBB == 0;
-}
-
-/// CanFallThrough - Return true if the specified can implicitly transfer
-/// control to the block after it by falling off the end of it.  This should
-/// return false if it can reach the block after it, but it uses an explicit
-/// branch to do so (e.g. a table jump).
-///
-/// True is a conservative answer.
-///
-bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true);
-  return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond);
-}
-
 /// IsBetterFallthrough - Return true if it would be clearly better to
 /// fall-through to MBB1 than to fall through into MBB2.  This has to return
 /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
@@ -1005,143 +928,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
 }
 
-/// TailDuplicateBlocks - Look for small blocks that are unconditionally
-/// branched to and do not fall through. Tail-duplicate their instructions
-/// into their predecessors to eliminate (dynamic) branches.
-bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) {
-  bool MadeChange = false;
-
-  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
-    MachineBasicBlock *MBB = I++;
-
-    // Only duplicate blocks that end with unconditional branches.
-    if (CanFallThrough(MBB))
-      continue;
-
-    MadeChange |= TailDuplicate(MBB, MF);
-
-    // If it is dead, remove it.
-    if (MBB->pred_empty()) {
-      NumInstrDups -= MBB->size();
-      RemoveDeadBlock(MBB);
-      MadeChange = true;
-      ++NumDeadBlocks;
-    }
-  }
-  return MadeChange;
-}
-
-/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
-/// of its predecessors.
-bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
-                                 MachineFunction &MF) {
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB->isSuccessor(TailBB))
-    return false;
-
-  // Set the limit on the number of instructions to duplicate, with a default
-  // of one less than the tail-merge threshold. When optimizing for size,
-  // duplicate only one, because one branch instruction can be eliminated to
-  // compensate for the duplication.
-  unsigned MaxDuplicateCount =
-    MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ?
-    1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1);
-
-  // Check the instructions in the block to determine whether tail-duplication
-  // is invalid or unlikely to be profitable.
-  unsigned i = 0;
-  bool HasCall = false;
-  for (MachineBasicBlock::iterator I = TailBB->begin();
-       I != TailBB->end(); ++I, ++i) {
-    // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable()) return false;
-    // Don't duplicate more than the threshold.
-    if (i == MaxDuplicateCount) return false;
-    // Remember if we saw a call.
-    if (I->getDesc().isCall()) HasCall = true;
-  }
-  // Heuristically, don't tail-duplicate calls if it would expand code size,
-  // as it's less likely to be worth the extra cost.
-  if (i > 1 && HasCall)
-    return false;
-
-  // Iterate through all the unique predecessors and tail-duplicate this
-  // block into them, if possible. Copying the list ahead of time also
-  // avoids trouble with the predecessor list reallocating.
-  bool Changed = false;
-  SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
-                                               TailBB->pred_end());
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-       PE = Preds.end(); PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-
-    assert(TailBB != PredBB &&
-           "Single-block loop should have been rejected earlier!");
-    if (PredBB->succ_size() > 1) continue;
-
-    MachineBasicBlock *PredTBB, *PredFBB;
-    SmallVector<MachineOperand, 4> PredCond;
-    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
-      continue;
-    if (!PredCond.empty())
-      continue;
-    // EH edges are ignored by AnalyzeBranch.
-    if (PredBB->succ_size() != 1)
-      continue;
-    // Don't duplicate into a fall-through predecessor (at least for now).
-    if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB))
-      continue;
-
-    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
-                 << "From Succ: " << *TailBB);
-
-    // Remove PredBB's unconditional branch.
-    TII->RemoveBranch(*PredBB);
-    // Clone the contents of TailBB into PredBB.
-    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
-         I != E; ++I) {
-      MachineInstr *NewMI = MF.CloneMachineInstr(I);
-      PredBB->insert(PredBB->end(), NewMI);
-    }
-    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
-
-    // Update the CFG.
-    PredBB->removeSuccessor(PredBB->succ_begin());
-    assert(PredBB->succ_empty() &&
-           "TailDuplicate called on block with multiple successors!");
-    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
-         E = TailBB->succ_end(); I != E; ++I)
-       PredBB->addSuccessor(*I);
-
-    Changed = true;
-    ++NumTailDups;
-  }
-
-  // If TailBB was duplicated into all its predecessors except for the prior
-  // block, which falls through unconditionally, move the contents of this
-  // block into the prior block.
-  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
-  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
-  SmallVector<MachineOperand, 4> PriorCond;
-  bool PriorUnAnalyzable =
-    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
-  // This has to check PrevBB->succ_size() because EH edges are ignored by
-  // AnalyzeBranch.
-  if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
-      TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
-      !TailBB->hasAddressTaken()) {
-    DEBUG(errs() << "\nMerging into block: " << PrevBB
-          << "From MBB: " << *TailBB);
-    PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
-    PrevBB.removeSuccessor(PrevBB.succ_begin());;
-    assert(PrevBB.succ_empty());
-    PrevBB.transferSuccessors(TailBB);
-    Changed = true;
-  }
-
-  return Changed;
-}
-
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
 /// block.  This is never called on the entry block.
 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
@@ -1266,7 +1052,7 @@ ReoptimizeBlock:
     // the assert condition out of the loop body.
     if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
         MachineFunction::iterator(PriorTBB) == FallThrough &&
-        !CanFallThrough(MBB)) {
+        !MBB->canFallThrough()) {
       bool DoTransform = true;
 
       // We have to be careful that the succs of PredBB aren't both no-successor
@@ -1290,7 +1076,7 @@ ReoptimizeBlock:
       // In this case, we could actually be moving the return block *into* a
       // loop!
       if (DoTransform && !MBB->succ_empty() &&
-          (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
+          (!PriorTBB->canFallThrough() || PriorTBB->empty()))
         DoTransform = false;
 
 
@@ -1422,13 +1208,11 @@ ReoptimizeBlock:
   // If the prior block doesn't fall through into this block, and if this
   // block doesn't fall through into some other block, see if we can find a
   // place to move this block where a fall-through will happen.
-  if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
-                      PriorTBB, PriorFBB, PriorCond)) {
+  if (!PrevBB.canFallThrough()) {
 
     // Now we know that there was no fall-through into this block, check to
     // see if it has a fall-through into its successor.
-    bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
-                                       CurCond);
+    bool CurFallsThru = MBB->canFallThrough();
 
     if (!MBB->isLandingPad()) {
       // Check all the predecessors of this block.  If one of them has no fall
@@ -1440,7 +1224,7 @@ ReoptimizeBlock:
         MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
         MachineBasicBlock *PredTBB, *PredFBB;
         SmallVector<MachineOperand, 4> PredCond;
-        if (PredBB != MBB && !CanFallThrough(PredBB) &&
+        if (PredBB != MBB && !PredBB->canFallThrough() &&
             !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
             && (!CurFallsThru || !CurTBB || !CurFBB)
             && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
@@ -1479,7 +1263,7 @@ ReoptimizeBlock:
         // and if the successor isn't an EH destination, we can arrange for the
         // fallthrough to happen.
         if (SuccBB != MBB && &*SuccPrev != MBB &&
-            !CanFallThrough(SuccPrev) && !CurUnAnalyzable &&
+            !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
             !SuccBB->isLandingPad()) {
           MBB->moveBefore(SuccBB);
           MadeChange = true;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 4920755..b087395 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -105,18 +105,10 @@ namespace llvm {
     unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
                                        unsigned maxCommonTailLength);
 
-    bool TailDuplicateBlocks(MachineFunction &MF);
-    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
-    
     bool OptimizeBranches(MachineFunction &MF);
     bool OptimizeBlock(MachineBasicBlock *MBB);
     void RemoveDeadBlock(MachineBasicBlock *MBB);
     bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
-    
-    bool CanFallThrough(MachineBasicBlock *CurBB);
-    bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
-                        MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond);
   };
 }
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9583edc..6f86614 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -63,6 +63,7 @@ add_llvm_library(LLVMCodeGen
   StackProtector.cpp
   StackSlotColoring.cpp
   StrongPHIElimination.cpp
+  TailDuplication.cpp
   TargetInstrInfoImpl.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 984e013..1b39fec 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -316,7 +316,6 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg,
 
 unsigned CriticalAntiDepBreaker::
 BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                      CandidateMap& Candidates,
                       MachineBasicBlock::iterator& Begin,
                       MachineBasicBlock::iterator& End,
                       unsigned InsertPosIndex) {
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 5664d85..496888d 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include <map>
 
 namespace llvm {
   class CriticalAntiDepBreaker : public AntiDepBreaker {
@@ -64,13 +65,6 @@ namespace llvm {
     CriticalAntiDepBreaker(MachineFunction& MFi);
     ~CriticalAntiDepBreaker();
     
-    /// GetMaxTrials - Critical path anti-dependence breaking requires
-    /// only a single pass
-    unsigned GetMaxTrials() { return 1; }
-
-    /// NeedCandidates - Candidates not needed.
-    bool NeedCandidates() { return false; }
-
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
@@ -78,7 +72,6 @@ namespace llvm {
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   CandidateMap& Candidates,
                                    MachineBasicBlock::iterator& Begin,
                                    MachineBasicBlock::iterator& End,
                                    unsigned InsertPosIndex);
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 7fc62a9..9b516ed 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -332,7 +332,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() {
   if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
     // Turn the exception temporary into registers and phi nodes if possible.
     std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
-    PromoteMemToReg(Allocas, *DT, *DF, ExceptionValueVar->getContext());
+    PromoteMemToReg(Allocas, *DT, *DF);
     return true;
   }
   return false;
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 45f08b1..c23d707 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -608,7 +608,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (TII->DefinesPredicate(I, PredDefs))
       BBI.ClobbersPred = true;
 
-    if (!TID.isPredicable()) {
+    if (!TII->isPredicable(I)) {
       BBI.IsUnpredicable = true;
       return;
     }
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 0db459b..242cba5 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -35,6 +35,8 @@ static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
     cl::desc("Disable Post Regalloc"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
     cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+    cl::desc("Disable tail duplication"));
 static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
     cl::desc("Disable code placement"));
 static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
@@ -66,6 +68,11 @@ static cl::opt<cl::boolOrDefault>
 EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
+// Enable or disable an experimental optimization to split GEPs
+// and run a special GVN pass which does not examine loads, in
+// an effort to factor out redundancy implicit in complex GEPs.
+static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
+    cl::desc("Split GEPs and run no-load GVN"));
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
                                      const std::string &TargetTriple)
@@ -223,6 +230,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                                                CodeGenOpt::Level OptLevel) {
   // Standard LLVM-Level Passes.
 
+  // Optionally, tun split-GEPs and no-load GVN.
+  if (EnableSplitGEPGVN) {
+    PM.add(createGEPSplitterPass());
+    PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true));
+  }
+
   // Run loop strength reduction before anything else.
   if (OptLevel != CodeGenOpt::None && !DisableLSR) {
     PM.add(createLoopStrengthReducePass(getTargetLowering()));
@@ -333,15 +346,17 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     printAndVerify(PM, "After BranchFolding");
   }
 
+  // Tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
+    PM.add(createTailDuplicatePass());
+    printAndVerify(PM, "After TailDuplicate");
+  }
+
   PM.add(createGCMachineCodeAnalysisPass());
 
   if (PrintGCInfo)
     PM.add(createGCInfoPrinter(errs()));
 
-  // Fold redundant debug labels.
-  PM.add(createDebugLabelFoldingPass());
-  printAndVerify(PM, "After DebugLabelFolding");
-
   if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
     PM.add(createCodePlacementOptPass());
     printAndVerify(PM, "After CodePlacementOpt");
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 23dce4a..f1bd573 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -55,10 +55,6 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
   SUnit *OnlyAvailablePred = 0;
   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-      continue;
-
     SUnit &Pred = *I->getSUnit();
     if (!Pred.isScheduled) {
       // We found an available, but not scheduled, predecessor.  If it's the
@@ -78,10 +74,6 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
   unsigned NumNodesBlocking = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-      continue;
-
     if (getSingleUnscheduledPred(I->getSUnit()) == SU)
       ++NumNodesBlocking;
   }
@@ -98,10 +90,6 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-      continue;
-
     AdjustPriorityOfUnscheduledPreds(I->getSUnit());
   }
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index bbfc82b..24adf36 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -136,7 +136,8 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
 
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
-    OS << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+    OS << "BB#" << mbbi->getNumber()
+       << ":\t\t# derived from " << mbbi->getName() << "\n";
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
       OS << getInstructionIndex(mii) << '\t' << *mii;
@@ -658,7 +659,7 @@ void LiveIntervals::computeIntervals() {
     MachineBasicBlock *MBB = MBBI;
     // Track the index of the current machine instr.
     SlotIndex MIIndex = getMBBStartIdx(MBB);
-    DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
+    DEBUG(errs() << MBB->getName() << ":\n");
 
     MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
 
@@ -1094,6 +1095,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       NewVReg = mri_->createVirtualRegister(rc);
       vrm.grow();
       CreatedNewVReg = true;
+
+      // The new virtual register should get the same allocation hints as the
+      // old one.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
+      if (Hint.first || Hint.second)
+        mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
     }
 
     if (!TryFold)
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 16a79bb..68f80ac 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -279,6 +279,43 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
     PhysRegUse[SubReg] =  MI;
 }
 
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
+    return false;
+
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+  MachineInstr *LastPartDef = 0;
+  unsigned LastPartDefDist = 0;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist) {
+        LastPartDefDist = Dist;
+        LastPartDef = Def;
+      }
+      continue;
+    }
+    if (MachineInstr *Use = PhysRegUse[SubReg]) {
+      unsigned Dist = DistanceMap[Use];
+      if (Dist > LastRefOrPartRefDist) {
+        LastRefOrPartRefDist = Dist;
+        LastRefOrPartRef = Use;
+      }
+    }
+  }
+
+  return LastRefOrPartRef;
+}
+
 bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   MachineInstr *LastDef = PhysRegDef[Reg];
   MachineInstr *LastUse = PhysRegUse[Reg];
@@ -373,7 +410,16 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
       if (NeedDef)
         PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
                                                  true/*IsDef*/, true/*IsImp*/));
-      LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+      MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+      if (LastSubRef)
+        LastSubRef->addRegisterKilled(SubReg, TRI, true);
+      else {
+        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+        PhysRegUse[SubReg] = LastRefOrPartRef;
+        for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+             unsigned SSReg = *SSRegs; ++SSRegs)
+          PhysRegUse[SSReg] = LastRefOrPartRef;
+      }
       for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.erase(*SS);
     }
@@ -656,35 +702,45 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
           .push_back(BBI->getOperand(i).getReg());
 }
 
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+                                      unsigned Reg,
+                                      MachineRegisterInfo &MRI) {
+  unsigned Num = MBB.getNumber();
+
+  // Reg is live-through.
+  if (AliveBlocks.test(Num))
+    return true;
+
+  // Registers defined in MBB cannot be live in.
+  const MachineInstr *Def = MRI.getVRegDef(Reg);
+  if (Def && Def->getParent() == &MBB)
+    return false;
+
+ // Reg was not defined in MBB, was it killed here?
+  return findKill(&MBB);
+}
+
 /// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
 /// variables that are live out of DomBB will be marked as passing live through
 /// BB.
 void LiveVariables::addNewBlock(MachineBasicBlock *BB,
-                                MachineBasicBlock *DomBB) {
+                                MachineBasicBlock *DomBB,
+                                MachineBasicBlock *SuccBB) {
   const unsigned NumNew = BB->getNumber();
-  const unsigned NumDom = DomBB->getNumber();
+
+  // All registers used by PHI nodes in SuccBB must be live through BB.
+  for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+         BBE = SuccBB->end();
+       BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+      if (BBI->getOperand(i+1).getMBB() == BB)
+        getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
 
   // Update info for all live variables
   for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
          E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
     VarInfo &VI = getVarInfo(Reg);
-
-    // Anything live through DomBB is also live through BB.
-    if (VI.AliveBlocks.test(NumDom)) {
+    if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
       VI.AliveBlocks.set(NumNew);
-      continue;
-    }
-
-    // Variables not defined in DomBB cannot be live out.
-    const MachineInstr *Def = MRI->getVRegDef(Reg);
-    if (!Def || Def->getParent() != DomBB)
-      continue;
-
-    // Killed by DomBB?
-    if (VI.findKill(DomBB))
-      continue;
-
-    // This register is defined in DomBB and live out
-    VI.AliveBlocks.set(NumNew);
   }
 }
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index cd52825..e55e369 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -172,6 +172,13 @@ static inline void OutputReg(raw_ostream &os, unsigned RegNo,
     os << " %reg" << RegNo;
 }
 
+StringRef MachineBasicBlock::getName() const {
+  if (const BasicBlock *LBB = getBasicBlock())
+    return LBB->getName();
+  else
+    return "(null)";
+}
+
 void MachineBasicBlock::print(raw_ostream &OS) const {
   const MachineFunction *MF = getParent();
   if (!MF) {
@@ -272,8 +279,9 @@ void MachineBasicBlock::updateTerminator() {
       // successors is its layout successor, rewrite it to a fallthrough
       // conditional branch.
       if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond))
+          return;
         TII->RemoveBranch(*this);
-        TII->ReverseBranchCondition(Cond);
         TII->InsertBranch(*this, FBB, 0, Cond);
       } else if (isLayoutSuccessor(FBB)) {
         TII->RemoveBranch(*this);
@@ -285,8 +293,13 @@ void MachineBasicBlock::updateTerminator() {
       MachineBasicBlock *MBBB = *next(succ_begin());
       if (MBBA == TBB) std::swap(MBBB, MBBA);
       if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond)) {
+          // We can't reverse the condition, add an unconditional branch.
+          Cond.clear();
+          TII->InsertBranch(*this, MBBA, 0, Cond);
+          return;
+        }
         TII->RemoveBranch(*this);
-        TII->ReverseBranchCondition(Cond);
         TII->InsertBranch(*this, MBBA, 0, Cond);
       } else if (!isLayoutSuccessor(MBBA)) {
         TII->RemoveBranch(*this);
@@ -349,6 +362,51 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
   return next(I) == MachineFunction::const_iterator(MBB);
 }
 
+bool MachineBasicBlock::canFallThrough() {
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  bool BranchUnAnalyzable = TII->AnalyzeBranch(*this, TBB, FBB, Cond, true);
+
+  MachineFunction::iterator Fallthrough = this;
+  ++Fallthrough;
+  // If FallthroughBlock is off the end of the function, it can't fall through.
+  if (Fallthrough == getParent()->end())
+    return false;
+
+  // If FallthroughBlock isn't a successor, no fallthrough is possible.
+  if (!isSuccessor(Fallthrough))
+    return false;
+
+  // If we couldn't analyze the branch, examine the last instruction.
+  // If the block doesn't end in a known control barrier, assume fallthrough
+  // is possible. The isPredicable check is needed because this code can be
+  // called during IfConversion, where an instruction which is normally a
+  // Barrier is predicated and thus no longer an actual control barrier. This
+  // is over-conservative though, because if an instruction isn't actually
+  // predicated we could still treat it like a barrier.
+  if (BranchUnAnalyzable)
+    return empty() || !back().getDesc().isBarrier() ||
+           back().getDesc().isPredicable();
+
+  // If there is no branch, control always falls through.
+  if (TBB == 0) return true;
+
+  // If there is some explicit branch to the fallthrough block, it can obviously
+  // reach, even though the branch should get folded to fall through implicitly.
+  if (MachineFunction::iterator(TBB) == Fallthrough ||
+      MachineFunction::iterator(FBB) == Fallthrough)
+    return true;
+
+  // If it's an unconditional branch to some block not the fall through, it
+  // doesn't fall through.
+  if (Cond.empty()) return false;
+
+  // Otherwise, if it is conditional and has no explicit false block, it falls
+  // through.
+  return FBB == 0;
+}
+
 /// removeFromParent - This method unlinks 'this' from the containing function,
 /// and returns it, but does not delete it.
 MachineBasicBlock *MachineBasicBlock::removeFromParent() {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 81d1301..d20f446 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -359,14 +359,16 @@ void MachineFunction::print(raw_ostream &OS) const {
 namespace llvm {
   template<>
   struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
     static std::string getGraphName(const MachineFunction *F) {
       return "CFG for '" + F->getFunction()->getNameStr() + "' function";
     }
 
-    static std::string getNodeLabel(const MachineBasicBlock *Node,
-                                    const MachineFunction *Graph,
-                                    bool ShortNames) {
-      if (ShortNames && Node->getBasicBlock() &&
+    std::string getNodeLabel(const MachineBasicBlock *Node,
+                             const MachineFunction *Graph) {
+      if (isSimple () && Node->getBasicBlock() &&
           !Node->getBasicBlock()->getName().empty())
         return Node->getBasicBlock()->getNameStr() + ":";
 
@@ -374,7 +376,7 @@ namespace llvm {
       {
         raw_string_ostream OSS(OutStr);
         
-        if (ShortNames)
+        if (isSimple())
           OSS << Node->getNumber() << ':';
         else
           Node->print(OSS);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index b250faa..f73a5a3 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1148,10 +1148,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     // TODO: print InlinedAtLoc information
 
     DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
-    DICompileUnit CU(DLT.Scope);
-    if (!CU.isNull())
-      OS << " dbg:" << CU.getDirectory() << '/' << CU.getFilename() << ":"
-         << DLT.Line << ":" << DLT.Col;
+    DIScope Scope(DLT.Scope);
+    OS << " dbg:";
+    if (!Scope.isNull())
+      OS << Scope.getDirectory() << ':' << Scope.getFilename() << ':';
+    OS << DLT.Line << ":" << DLT.Col;
   }
 
   OS << "\n";
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 33b6b82..66de535 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -107,6 +107,10 @@ namespace {
     ///
     void HoistRegion(MachineDomTreeNode *N);
 
+    /// isLoadFromConstantMemory - Return true if the given instruction is a
+    /// load from constant memory.
+    bool isLoadFromConstantMemory(MachineInstr *MI);
+
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
     /// load, or null if the load couldn't be unfolded or if it wouldn't
@@ -338,6 +342,24 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
   return false;
 }
 
+/// isLoadFromConstantMemory - Return true if the given instruction is a
+/// load from constant memory. Machine LICM will hoist these even if they are
+/// not re-materializable.
+bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
+  if (!MI->getDesc().mayLoad()) return false;
+  if (!MI->hasOneMemOperand()) return false;
+  MachineMemOperand *MMO = *MI->memoperands_begin();
+  if (MMO->isVolatile()) return false;
+  if (!MMO->getValue()) return false;
+  const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
+  if (PSV) {
+    MachineFunction &MF = *MI->getParent()->getParent();
+    return PSV->isConstant(MF.getFrameInfo());
+  } else {
+    return AA->pointsToConstantMemory(MMO->getValue());
+  }
+}
+
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
@@ -347,8 +369,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
   // increase register pressure. We want to make sure it doesn't increase
   // spilling.
-  if (!TII->isTriviallyReMaterializable(&MI, AA))
-    return false;
+  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
+  // these tend to help performance in low register pressure situation. The
+  // trade off is it may cause spill in high pressure situation. It will end up
+  // adding a store in the loop preheader. But the reload is no more expensive.
+  // The side benefit is these loads are frequently CSE'ed.
+  if (!TII->isTriviallyReMaterializable(&MI, AA)) {
+    if (!isLoadFromConstantMemory(&MI))
+      return false;
+  }
 
   // If result(s) of this instruction is used by PHIs, then don't hoist it.
   // The presence of joins makes it difficult for current register allocator
@@ -368,18 +397,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // If not, we may be able to unfold a load and hoist that.
   // First test whether the instruction is loading from an amenable
   // memory location.
-  if (!MI->getDesc().mayLoad()) return 0;
-  if (!MI->hasOneMemOperand()) return 0;
-  MachineMemOperand *MMO = *MI->memoperands_begin();
-  if (MMO->isVolatile()) return 0;
-  MachineFunction &MF = *MI->getParent()->getParent();
-  if (!MMO->getValue()) return 0;
-  if (const PseudoSourceValue *PSV =
-        dyn_cast<PseudoSourceValue>(MMO->getValue())) {
-    if (!PSV->isConstant(MF.getFrameInfo())) return 0;
-  } else {
-    if (!AA->pointsToConstantMemory(MMO->getValue())) return 0;
-  }
+  if (!isLoadFromConstantMemory(MI))
+    return 0;
+
   // Next determine the register class for a temporary register.
   unsigned LoadRegIndex;
   unsigned NewOpc =
@@ -393,6 +413,8 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
   unsigned Reg = RegInfo->createVirtualRegister(RC);
+
+  MachineFunction &MF = *MI->getParent()->getParent();
   SmallVector<MachineInstr *, 2> NewMIs;
   bool Success =
     TII->unfoldMemoryOperand(MF, MI, Reg,
@@ -487,10 +509,10 @@ void MachineLICM::Hoist(MachineInstr *MI) {
       errs() << "Hoisting " << *MI;
       if (CurPreheader->getBasicBlock())
         errs() << " to MachineBasicBlock "
-               << CurPreheader->getBasicBlock()->getName();
+               << CurPreheader->getName();
       if (MI->getParent()->getBasicBlock())
         errs() << " from MachineBasicBlock "
-               << MI->getParent()->getBasicBlock()->getName();
+               << MI->getParent()->getName();
       errs() << "\n";
     });
 
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 4b067a0..ed5bb5e 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -293,75 +293,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   return 0;
 }
 
-//===----------------------------------------------------------------------===//
-/// DebugLabelFolding pass - This pass prunes out redundant labels.  This allows
-/// a info consumer to determine if the range of two labels is empty, by seeing
-/// if the labels map to the same reduced label.
-
-namespace llvm {
-
-struct DebugLabelFolder : public MachineFunctionPass {
-  static char ID;
-  DebugLabelFolder() : MachineFunctionPass(&ID) {}
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesCFG();
-    AU.addPreservedID(MachineLoopInfoID);
-    AU.addPreservedID(MachineDominatorsID);
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  virtual bool runOnMachineFunction(MachineFunction &MF);
-  virtual const char *getPassName() const { return "Label Folder"; }
-};
-
-char DebugLabelFolder::ID = 0;
-
-bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
-  // Get machine module info.
-  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  if (!MMI) return false;
-
-  // Track if change is made.
-  bool MadeChange = false;
-  // No prior label to begin.
-  unsigned PriorLabel = 0;
-
-  // Iterate through basic blocks.
-  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
-       BB != E; ++BB) {
-    // Iterate through instructions.
-    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-      // Is it a label.
-      if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){
-        // The label ID # is always operand #0, an immediate.
-        unsigned NextLabel = I->getOperand(0).getImm();
-
-        // If there was an immediate prior label.
-        if (PriorLabel) {
-          // Remap the current label to prior label.
-          MMI->RemapLabel(NextLabel, PriorLabel);
-          // Delete the current label.
-          I = BB->erase(I);
-          // Indicate a change has been made.
-          MadeChange = true;
-          continue;
-        } else {
-          // Start a new round.
-          PriorLabel = NextLabel;
-        }
-       } else {
-        // No consecutive labels.
-        PriorLabel = 0;
-      }
-
-      ++I;
-    }
-  }
-
-  return MadeChange;
-}
-
-FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
-
-}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index a1c74c0..d9f4c99 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -305,7 +305,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr()
+  *OS << "- basic block: " << MBB->getName()
       << " " << (void*)MBB
       << " (BB#" << MBB->getNumber() << ")\n";
 }
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b3802ed..2e30cc6 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -353,7 +353,7 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // We break edges when registers are live out from the predecessor block
       // (not considering PHI nodes). If the register is live in to this block
       // anyway, we would gain nothing from splitting.
-      if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV))
+      if (!LV.isLiveIn(Reg, MBB) && isLiveOut(Reg, *PreMBB, LV))
         SplitCriticalEdge(PreMBB, &MBB);
     }
   }
@@ -406,22 +406,6 @@ bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
   return false;
 }
 
-bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
-                                    LiveVariables &LV) {
-  LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
-
-  if (VI.AliveBlocks.test(MBB.getNumber()))
-    return true;
-
-  // defined in MBB?
-  const MachineInstr *Def = MRI->getVRegDef(Reg);
-  if (Def && Def->getParent() == &MBB)
-    return false;
-
-  // killed in MBB?
-  return VI.findKill(&MBB);
-}
-
 MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
                                                      MachineBasicBlock *B) {
   assert(A && B && "Missing MBB end point");
@@ -439,21 +423,21 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
   ++NumSplits;
 
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
-  MF->push_back(NMBB);
+  MF->insert(next(MachineFunction::iterator(A)), NMBB);
   DEBUG(errs() << "PHIElimination splitting critical edge:"
         " BB#" << A->getNumber()
         << " -- BB#" << NMBB->getNumber()
         << " -- BB#" << B->getNumber() << '\n');
 
   A->ReplaceUsesOfBlockWith(B, NMBB);
-  // If A may fall through to B, we may have to insert a branch.
-  if (A->isLayoutSuccessor(B))
-    A->updateTerminator();
+  A->updateTerminator();
 
-  // Insert unconditional "jump B" instruction in NMBB.
+  // Insert unconditional "jump B" instruction in NMBB if necessary.
   NMBB->addSuccessor(B);
-  Cond.clear();
-  MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+  if (!NMBB->isLayoutSuccessor(B)) {
+    Cond.clear();
+    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+  }
 
   // Fix PHI nodes in B so they refer to NMBB instead of A
   for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
@@ -463,7 +447,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
         i->getOperand(ni+1).setMBB(NMBB);
 
   if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
-    LV->addNewBlock(NMBB, A);
+    LV->addNewBlock(NMBB, A, B);
 
   if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
     MDT->addNewBlock(NMBB, A);
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index f8c9fe7..f5872cb 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -99,12 +99,6 @@ namespace llvm {
     bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
                    LiveVariables &LV);
 
-    /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI
-    /// source registers. This means that Reg is either killed by MBB or passes
-    /// through it.
-    bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
-                  LiveVariables &LV);
-
     /// SplitCriticalEdge - Split a critical edge from A to B by
     /// inserting a new MBB. Update branches in A and PHI instructions
     /// in B. Return the new block.
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 5f1f1f3..9101fce2 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -175,11 +175,10 @@ namespace {
     void FixupKills(MachineBasicBlock *MBB);
 
   private:
-    void ReleaseSucc(SUnit *SU, SDep *SuccEdge, bool IgnoreAntiDep);
-    void ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep);
-    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle, bool IgnoreAntiDep);
-    void ListScheduleTopDown(
-           AntiDepBreaker::CandidateMap *AntiDepCandidates);
+    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+    void ReleaseSuccessors(SUnit *SU);
+    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+    void ListScheduleTopDown();
     void StartBlockForKills(MachineBasicBlock *BB);
     
     // ToggleKillFlag - Toggle a register operand kill flag. Other
@@ -322,50 +321,24 @@ void SchedulePostRATDList::Schedule() {
   BuildSchedGraph(AA);
 
   if (AntiDepBreak != NULL) {
-    AntiDepBreaker::CandidateMap AntiDepCandidates;
-    const bool NeedCandidates = AntiDepBreak->NeedCandidates();
+    unsigned Broken = 
+      AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
+                                          InsertPosIndex);
     
-    for (unsigned i = 0, Trials = AntiDepBreak->GetMaxTrials();
-         i < Trials; ++i) {
-      DEBUG(errs() << "\n********** Break Anti-Deps, Trial " << 
-            i << " **********\n");
-      
-      // If candidates are required, then schedule forward ignoring
-      // anti-dependencies to collect the candidate operands for
-      // anti-dependence breaking. The candidates will be the def
-      // operands for the anti-dependencies that if broken would allow
-      // an improved schedule
-      if (NeedCandidates) {
-        DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-                SUnits[su].dumpAll(this));
-
-        AntiDepCandidates.clear();
-        AvailableQueue.initNodes(SUnits);
-        ListScheduleTopDown(&AntiDepCandidates);
-        AvailableQueue.releaseState();
-      }
-
-      unsigned Broken = 
-        AntiDepBreak->BreakAntiDependencies(SUnits, AntiDepCandidates,
-                                            Begin, InsertPos, InsertPosIndex);
-
+    if (Broken != 0) {
       // We made changes. Update the dependency graph.
       // Theoretically we could update the graph in place:
       // When a live range is changed to use a different register, remove
       // the def's anti-dependence *and* output-dependence edges due to
       // that register, and add new anti-dependence and output-dependence
       // edges based on the next live range of the register.
-      if ((Broken != 0) || NeedCandidates) {
-        SUnits.clear();
-        Sequence.clear();
-        EntrySU = SUnit();
-        ExitSU = SUnit();
-        BuildSchedGraph(AA);
-      }
-
+      SUnits.clear();
+      Sequence.clear();
+      EntrySU = SUnit();
+      ExitSU = SUnit();
+      BuildSchedGraph(AA);
+      
       NumFixedAnti += Broken;
-      if (Broken == 0)
-        break;
     }
   }
 
@@ -374,7 +347,7 @@ void SchedulePostRATDList::Schedule() {
           SUnits[su].dumpAll(this));
 
   AvailableQueue.initNodes(SUnits);
-  ListScheduleTopDown(NULL);
+  ListScheduleTopDown();
   AvailableQueue.releaseState();
 }
 
@@ -573,8 +546,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
 
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
-                                       bool IgnoreAntiDep) {
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
 #ifndef NDEBUG
@@ -590,8 +562,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
-  SuccSU->setDepthToAtLeast(SU->getDepth(IgnoreAntiDep) +
-                            SuccEdge->getLatency(), IgnoreAntiDep);
+  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
   
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
@@ -600,40 +571,34 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
 }
 
 /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
-void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) {
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
-      continue;
-    ReleaseSucc(SU, &*I, IgnoreAntiDep);
+    ReleaseSucc(SU, &*I);
   }
 }
 
 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
-void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle,
-                                               bool IgnoreAntiDep) {
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  assert(CurCycle >= SU->getDepth(IgnoreAntiDep) && 
+  assert(CurCycle >= SU->getDepth() && 
          "Node scheduled above its depth!");
-  SU->setDepthToAtLeast(CurCycle, IgnoreAntiDep);
+  SU->setDepthToAtLeast(CurCycle);
 
-  ReleaseSuccessors(SU, IgnoreAntiDep);
+  ReleaseSuccessors(SU);
   SU->isScheduled = true;
   AvailableQueue.ScheduledNode(SU);
 }
 
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
 /// schedulers.
-void SchedulePostRATDList::ListScheduleTopDown(
-                   AntiDepBreaker::CandidateMap *AntiDepCandidates) {
+void SchedulePostRATDList::ListScheduleTopDown() {
   unsigned CurCycle = 0;
-  const bool IgnoreAntiDep = (AntiDepCandidates != NULL);
   
   // We're scheduling top-down but we're visiting the regions in
   // bottom-up order, so we don't know the hazards at the start of a
@@ -641,33 +606,13 @@ void SchedulePostRATDList::ListScheduleTopDown(
   // blocks are a single region).
   HazardRec->Reset();
 
-  // If ignoring anti-dependencies, the Schedule DAG still has Anti
-  // dep edges, but we ignore them for scheduling purposes
-  AvailableQueue.setIgnoreAntiDep(IgnoreAntiDep);
-
   // Release any successors of the special Entry node.
-  ReleaseSuccessors(&EntrySU, IgnoreAntiDep);
+  ReleaseSuccessors(&EntrySU);
 
-  // Add all leaves to Available queue. If ignoring antideps we also
-  // adjust the predecessor count for each node to not include antidep
-  // edges.
+  // Add all leaves to Available queue.
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     // It is available if it has no predecessors.
     bool available = SUnits[i].Preds.empty();
-    // If we are ignoring anti-dependencies then a node that has only
-    // anti-dep predecessors is available.
-    if (!available && IgnoreAntiDep) {
-      available = true;
-      for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(),
-             E = SUnits[i].Preds.end(); I != E; ++I) {
-        if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output))  {
-          available = false;
-        } else {
-          SUnits[i].NumPredsLeft -= 1;
-        }
-      }
-    }
-
     if (available) {
       AvailableQueue.push(&SUnits[i]);
       SUnits[i].isAvailable = true;
@@ -687,21 +632,21 @@ void SchedulePostRATDList::ListScheduleTopDown(
     // so, add them to the available queue.
     unsigned MinDepth = ~0u;
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->getDepth(IgnoreAntiDep) <= CurCycle) {
+      if (PendingQueue[i]->getDepth() <= CurCycle) {
         AvailableQueue.push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
-      } else if (PendingQueue[i]->getDepth(IgnoreAntiDep) < MinDepth)
-        MinDepth = PendingQueue[i]->getDepth(IgnoreAntiDep);
+      } else if (PendingQueue[i]->getDepth() < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth();
     }
 
     DEBUG(errs() << "\n*** Examining Available\n";
           LatencyPriorityQueue q = AvailableQueue;
           while (!q.empty()) {
             SUnit *su = q.pop();
-            errs() << "Height " << su->getHeight(IgnoreAntiDep) << ": ";
+            errs() << "Height " << su->getHeight() << ": ";
             su->dump(this);
           });
 
@@ -731,30 +676,8 @@ void SchedulePostRATDList::ListScheduleTopDown(
 
     // If we found a node to schedule...
     if (FoundSUnit) {
-      // If we are ignoring anti-dependencies and the SUnit we are
-      // scheduling has an antidep predecessor that has not been
-      // scheduled, then we will need to break that antidep if we want
-      // to get this schedule when not ignoring anti-dependencies.
-      if (IgnoreAntiDep) {
-        AntiDepBreaker::AntiDepRegVector AntiDepRegs;
-        for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(),
-               E = FoundSUnit->Preds.end(); I != E; ++I) {
-          if (((I->getKind() == SDep::Anti) || 
-               (I->getKind() == SDep::Output)) &&
-              !I->getSUnit()->isScheduled)
-            AntiDepRegs.push_back(I->getReg());
-        }
-        
-        if (AntiDepRegs.size() > 0) {
-          DEBUG(errs() << "*** AntiDep Candidate: ");
-          DEBUG(FoundSUnit->dump(this));
-          AntiDepCandidates->insert(
-            AntiDepBreaker::CandidateMap::value_type(FoundSUnit, AntiDepRegs));
-        }
-      }
-
       // ... schedule the node...
-      ScheduleNodeTopDown(FoundSUnit, CurCycle, IgnoreAntiDep);
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
       HazardRec->EmitInstruction(FoundSUnit);
       CycleHasInsts = true;
 
@@ -775,8 +698,7 @@ void SchedulePostRATDList::ListScheduleTopDown(
         // just advance the current cycle and try again.
         DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
         HazardRec->AdvanceCycle();
-        if (!IgnoreAntiDep)
-          ++NumStalls;
+        ++NumStalls;
       } else {
         // Otherwise, we have no instructions to issue and we have instructions
         // that will fault if we don't do this right.  This is the case for
@@ -784,8 +706,7 @@ void SchedulePostRATDList::ListScheduleTopDown(
         DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
         HazardRec->EmitNoop();
         Sequence.push_back(0);   // NULL here means noop
-        if (!IgnoreAntiDep)
-          ++NumNoops;
+        ++NumNoops;
       }
 
       ++CurCycle;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 455964b..c9a33d8 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -75,10 +75,11 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
 
   SmallSet<unsigned, 8> ImpDefRegs;
   SmallVector<MachineInstr*, 8> ImpDefMIs;
-  MachineBasicBlock *Entry = fn.begin();
+  SmallVector<MachineInstr*, 4> RUses;
   SmallPtrSet<MachineBasicBlock*,16> Visited;
   SmallPtrSet<MachineInstr*, 8> ModInsts;
 
+  MachineBasicBlock *Entry = fn.begin();
   for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
          DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
        DFI != E; ++DFI) {
@@ -182,53 +183,87 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       // is not an implicit_def, do not insert implicit_def's before the
       // uses.
       bool Skip = false;
+      SmallVector<MachineInstr*, 4> DeadImpDefs;
       for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
              DE = mri_->def_end(); DI != DE; ++DI) {
-        if (DI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+        MachineInstr *DeadImpDef = &*DI;
+        if (DeadImpDef->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
           Skip = true;
           break;
         }
+        DeadImpDefs.push_back(DeadImpDef);
       }
       if (Skip)
         continue;
 
       // The only implicit_def which we want to keep are those that are live
       // out of its block.
-      MI->eraseFromParent();
+      for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
+        DeadImpDefs[j]->eraseFromParent();
       Changed = true;
 
+      // Process each use instruction once.
       for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
-             UE = mri_->use_end(); UI != UE; ) {
-        MachineOperand &RMO = UI.getOperand();
+             UE = mri_->use_end(); UI != UE; ++UI) {
         MachineInstr *RMI = &*UI;
-        ++UI;
-        if (ModInsts.count(RMI))
-          continue;
         MachineBasicBlock *RMBB = RMI->getParent();
         if (RMBB == MBB)
           continue;
+        if (ModInsts.insert(RMI))
+          RUses.push_back(RMI);
+      }
+
+      for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
+        MachineInstr *RMI = RUses[i];
 
         // Turn a copy use into an implicit_def.
         unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
         if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
             Reg == SrcReg) {
-          if (RMO.isKill()) {
+          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+
+          bool isKill = false;
+          SmallVector<unsigned, 4> Ops;
+          for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+            MachineOperand &RRMO = RMI->getOperand(j);
+            if (RRMO.isReg() && RRMO.getReg() == Reg) {
+              Ops.push_back(j);
+              if (RRMO.isKill())
+                isKill = true;
+            }
+          }
+          // Leave the other operands along.
+          for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
+            unsigned OpIdx = Ops[j];
+            RMI->RemoveOperand(OpIdx-j);
+          }
+
+          // Update LiveVariables varinfo if the instruction is a kill.
+          if (isKill) {
             LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
             vi.removeKill(RMI);
           }
-          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-          for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
-            RMI->RemoveOperand(j);
-          ModInsts.insert(RMI);
           continue;
         }
 
+        // Replace Reg with a new vreg that's marked implicit.
         const TargetRegisterClass* RC = mri_->getRegClass(Reg);
         unsigned NewVReg = mri_->createVirtualRegister(RC);
-        RMO.setReg(NewVReg);
-        RMO.setIsUndef();
-        RMO.setIsKill();
+        bool isKill = true;
+        for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+          MachineOperand &RRMO = RMI->getOperand(j);
+          if (RRMO.isReg() && RRMO.getReg() == Reg) {
+            RRMO.setReg(NewVReg);
+            RRMO.setIsUndef();
+            if (isKill) {
+              // Only the first operand of NewVReg is marked kill.
+              RRMO.setIsKill();
+              isKill = false;
+            }
+          }
+        }
       }
+      RUses.clear();
     }
     ModInsts.clear();
     ImpDefRegs.clear();
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index fff50da..4ff5129 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -64,9 +64,31 @@ linearscanRegAlloc("linearscan", "linear scan register allocator",
                    createLinearScanRegisterAllocator);
 
 namespace {
+  // When we allocate a register, add it to a fixed-size queue of
+  // registers to skip in subsequent allocations. This trades a small
+  // amount of register pressure and increased spills for flexibility in
+  // the post-pass scheduler.
+  //
+  // Note that in a the number of registers used for reloading spills
+  // will be one greater than the value of this option.
+  //
+  // One big limitation of this is that it doesn't differentiate between
+  // different register classes. So on x86-64, if there is xmm register
+  // pressure, it can caused fewer GPRs to be held in the queue.
+  static cl::opt<unsigned>
+  NumRecentlyUsedRegs("linearscan-skip-count",
+                      cl::desc("Number of registers for linearscan to remember to skip."),
+                      cl::init(0),
+                      cl::Hidden);
+ 
   struct RALinScan : public MachineFunctionPass {
     static char ID;
-    RALinScan() : MachineFunctionPass(&ID) {}
+    RALinScan() : MachineFunctionPass(&ID) {
+      // Initialize the queue to record recently-used registers.
+      if (NumRecentlyUsedRegs > 0)
+        RecentRegs.resize(NumRecentlyUsedRegs, 0);
+      RecentNext = RecentRegs.begin();
+    }
 
     typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
     typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
@@ -132,6 +154,20 @@ namespace {
 
     std::auto_ptr<Spiller> spiller_;
 
+    // The queue of recently-used registers.
+    SmallVector<unsigned, 4> RecentRegs;
+    SmallVector<unsigned, 4>::iterator RecentNext;
+
+    // Record that we just picked this register.
+    void recordRecentlyUsed(unsigned reg) {
+      assert(reg != 0 && "Recently used register is NOREG!");
+      if (!RecentRegs.empty()) {
+        *RecentNext++ = reg;
+        if (RecentNext == RecentRegs.end())
+          RecentNext = RecentRegs.begin();
+      }
+    }
+
   public:
     virtual const char* getPassName() const {
       return "Linear Scan Register Allocator";
@@ -161,6 +197,12 @@ namespace {
     /// runOnMachineFunction - register allocate the whole function
     bool runOnMachineFunction(MachineFunction&);
 
+    // Determine if we skip this register due to its being recently used.
+    bool isRecentlyUsed(unsigned reg) const {
+      return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
+             RecentRegs.end();
+    }
+
   private:
     /// linearScan - the linear scan algorithm
     void linearScan();
@@ -436,7 +478,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   vrm_ = &getAnalysis<VirtRegMap>();
   if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
   
-  spiller_.reset(createSpiller(mf_, li_, ls_, loopInfo, vrm_));
+  spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_));
   
   initIntervalSets();
 
@@ -833,9 +875,15 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
 
 namespace {
   struct WeightCompare {
+  private:
+    const RALinScan &Allocator;
+
+  public:
+    WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {};
+
     typedef std::pair<unsigned, float> RegWeightPair;
     bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
-      return LHS.second < RHS.second;
+      return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
     }
   };
 }
@@ -1079,7 +1127,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
            e = RC->allocation_order_end(*mf_); i != e; ++i) {
       unsigned reg = *i;
       float regWeight = SpillWeights[reg];
-      if (minWeight > regWeight)
+      // Skip recently allocated registers.
+      if (minWeight > regWeight && !isRecentlyUsed(reg))
         Found = true;
       RegsWeights.push_back(std::make_pair(reg, regWeight));
     }
@@ -1097,7 +1146,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   }
 
   // Sort all potential spill candidates by weight.
-  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare());
+  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
   minReg = RegsWeights[0].first;
   minWeight = RegsWeights[0].second;
   if (minWeight == HUGE_VALF) {
@@ -1360,7 +1409,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
-    if (isRegAvail(Reg)) {
+    // Skip recently allocated registers.
+    if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
       FreeReg = Reg;
       if (FreeReg < inactiveCounts.size())
         FreeRegInactiveCount = inactiveCounts[FreeReg];
@@ -1372,9 +1422,12 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
 
   // If there are no free regs, or if this reg has the max inactive count,
   // return this register.
-  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
+  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
+    // Remember what register we picked so we can skip it next time.
+    if (FreeReg != 0) recordRecentlyUsed(FreeReg);
     return FreeReg;
- 
+  }
+
   // Continue scanning the registers, looking for the one with the highest
   // inactive count.  Alkis found that this reduced register pressure very
   // slightly on X86 (in rev 1.94 of this file), though this should probably be
@@ -1385,7 +1438,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
     if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
-        FreeRegInactiveCount < inactiveCounts[Reg]) {
+        FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
       FreeReg = Reg;
       FreeRegInactiveCount = inactiveCounts[Reg];
       if (FreeRegInactiveCount == MaxInactiveCount)
@@ -1393,6 +1446,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     }
   }
 
+  // Remember what register we picked so we can skip it next time.
+  recordRecentlyUsed(FreeReg);
+
   return FreeReg;
 }
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6b27db2..71693d2 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -183,8 +183,8 @@ void SUnit::setHeightDirty() {
 /// setDepthToAtLeast - Update this node's successors to reflect the
 /// fact that this node's depth just increased.
 ///
-void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) {
-  if (NewDepth <= getDepth(IgnoreAntiDep))
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+  if (NewDepth <= getDepth())
     return;
   setDepthDirty();
   Depth = NewDepth;
@@ -194,8 +194,8 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) {
 /// setHeightToAtLeast - Update this node's predecessors to reflect the
 /// fact that this node's height just increased.
 ///
-void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) {
-  if (NewHeight <= getHeight(IgnoreAntiDep))
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+  if (NewHeight <= getHeight())
     return;
   setHeightDirty();
   Height = NewHeight;
@@ -204,7 +204,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) {
 
 /// ComputeDepth - Calculate the maximal path from the node to the exit.
 ///
-void SUnit::ComputeDepth(bool IgnoreAntiDep) {
+void SUnit::ComputeDepth() {
   SmallVector<SUnit*, 8> WorkList;
   WorkList.push_back(this);
   do {
@@ -214,10 +214,6 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) {
     unsigned MaxPredDepth = 0;
     for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
          E = Cur->Preds.end(); I != E; ++I) {
-      if (IgnoreAntiDep && 
-          ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-        continue;
-
       SUnit *PredSU = I->getSUnit();
       if (PredSU->isDepthCurrent)
         MaxPredDepth = std::max(MaxPredDepth,
@@ -241,7 +237,7 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) {
 
 /// ComputeHeight - Calculate the maximal path from the node to the entry.
 ///
-void SUnit::ComputeHeight(bool IgnoreAntiDep) {
+void SUnit::ComputeHeight() {
   SmallVector<SUnit*, 8> WorkList;
   WorkList.push_back(this);
   do {
@@ -251,10 +247,6 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) {
     unsigned MaxSuccHeight = 0;
     for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
          E = Cur->Succs.end(); I != E; ++I) {
-      if (IgnoreAntiDep && 
-          ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-        continue;
-
       SUnit *SuccSU = I->getSUnit();
       if (SuccSU->isHeightCurrent)
         MaxSuccHeight = std::max(MaxSuccHeight,
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4851d49..027f615 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -32,6 +32,9 @@ using namespace llvm;
 namespace llvm {
   template<>
   struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
     static std::string getGraphName(const ScheduleDAG *G) {
       return G->MF.getFunction()->getName();
     }
@@ -57,9 +60,7 @@ namespace llvm {
     }
     
 
-    static std::string getNodeLabel(const SUnit *Node,
-                                    const ScheduleDAG *Graph,
-                                    bool ShortNames);
+    std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
     static std::string getNodeAttributes(const SUnit *N,
                                          const ScheduleDAG *Graph) {
       return "shape=Mrecord";
@@ -73,8 +74,7 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
-                                                       const ScheduleDAG *G,
-                                                       bool ShortNames) {
+                                                       const ScheduleDAG *G) {
   return G->getGraphNodeLabel(SU);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index c766859..80c7d7c 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMSelectionDAG
   CallingConvLower.cpp
   DAGCombiner.cpp
   FastISel.cpp
+  FunctionLoweringInfo.cpp
   InstrEmitter.cpp
   LegalizeDAG.cpp
   LegalizeFloatTypes.cpp
@@ -15,7 +16,7 @@ add_llvm_library(LLVMSelectionDAG
   ScheduleDAGRRList.cpp
   ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
-  SelectionDAGBuild.cpp
+  SelectionDAGBuilder.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 7dbc136..5eb9ca1 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -54,7 +54,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "SelectionDAGBuild.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
 using namespace llvm;
 
 unsigned FastISel::getRegForValue(Value *V) {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 0000000..e3b25c2
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,355 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "FunctionLoweringInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty == Type::getVoidTy(Ty->getContext()))
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+  if (isa<PHINode>(I)) return true;
+  BasicBlock *BB = I->getParent();
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+      return true;
+  return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  // Don't force virtual registers for byval arguments though, because
+  // fast-isel can't handle those in all cases.
+  if (EnableFastISel && !A->hasByValAttr())
+    return A->use_empty();
+
+  BasicBlock *Entry = A->getParent()->begin();
+  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+      return false;  // Use not in entry block.
+  return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+  : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+                               bool EnableFastISel) {
+  Fn = &fn;
+  MF = &mf;
+  RegInfo = &MF->getRegInfo();
+
+  // Create a vreg for each argument register that is not dead and is used
+  // outside of the entry block for the function.
+  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+       AI != E; ++AI)
+    if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
+      InitializeRegForValue(AI);
+
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
+  Function::iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        const Type *Ty = AI->getAllocatedType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align =
+          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+
+        TySize *= CUI->getZExtValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+        StaticAllocaMap[AI] =
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
+      }
+
+  for (; BB != EB; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+        if (!isa<AllocaInst>(I) ||
+            !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(I);
+
+  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
+  // also creates the initial PHI MachineInstrs, though none of the input
+  // operands are populated.
+  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+    MBBMap[BB] = MBB;
+    MF->push_back(MBB);
+
+    // Transfer the address-taken flag. This is necessary because there could
+    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+    // the first one should be marked.
+    if (BB->hasAddressTaken())
+      MBB->setHasAddressTaken();
+
+    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+    // appropriate.
+    PHINode *PN;
+    DebugLoc DL;
+    for (BasicBlock::iterator
+           I = BB->begin(), E = BB->end(); I != E; ++I) {
+
+      PN = dyn_cast<PHINode>(I);
+      if (!PN || PN->use_empty()) continue;
+
+      unsigned PHIReg = ValueMap[PN];
+      assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+        const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+        for (unsigned i = 0; i != NumRegisters; ++i)
+          BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
+        PHIReg += NumRegisters;
+      }
+    }
+  }
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+  MBBMap.clear();
+  ValueMap.clear();
+  StaticAllocaMap.clear();
+#ifndef NDEBUG
+  CatchInfoLost.clear();
+  CatchInfoFound.clear();
+#endif
+  LiveOutRegInfo.clear();
+}
+
+unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
+  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types.  Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, V->getType(), ValueVTs);
+
+  unsigned FirstReg = 0;
+  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
+
+    unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      unsigned R = MakeReg(RegisterVT);
+      if (!FirstReg) FirstReg = R;
+    }
+  }
+  return FirstReg;
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+  assert ((GV || isa<ConstantPointerNull>(V)) &&
+          "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+                        MachineBasicBlock *MBB) {
+  // Inform the MachineModuleInfo of the personality for this landing pad.
+  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  assert(CE->getOpcode() == Instruction::BitCast &&
+         isa<Function>(CE->getOperand(0)) &&
+         "Personality should be a function");
+  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+  // Gather all the type infos for this landing pad and pass them along to
+  // MachineModuleInfo.
+  std::vector<GlobalVariable *> TyInfo;
+  unsigned N = I.getNumOperands();
+
+  for (unsigned i = N - 1; i > 2; --i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+      unsigned FilterLength = CI->getZExtValue();
+      unsigned FirstCatch = i + FilterLength + !FilterLength;
+      assert (FirstCatch <= N && "Invalid filter length");
+
+      if (FirstCatch < N) {
+        TyInfo.reserve(N - FirstCatch);
+        for (unsigned j = FirstCatch; j < N; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addCatchTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      if (!FilterLength) {
+        // Cleanup.
+        MMI->addCleanup(MBB);
+      } else {
+        // Filter.
+        TyInfo.reserve(FilterLength - 1);
+        for (unsigned j = i + 1; j < FirstCatch; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addFilterTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      N = i;
+    }
+  }
+
+  if (N > 3) {
+    TyInfo.reserve(N - 3);
+    for (unsigned j = 3; j < N; ++j)
+      TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+    MMI->addCatchTypeInfo(MBB, TyInfo);
+  }
+}
+
+void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+      // Apply the catch info to DestBB.
+      AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+      if (!FLI.MBBMap[SrcBB]->isLandingPad())
+        FLI.CatchInfoFound.insert(EHSel);
+#endif
+    }
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
new file mode 100644
index 0000000..d851e64
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
@@ -0,0 +1,151 @@
+//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FUNCTIONLOWERINGINFO_H
+#define FUNCTIONLOWERINGINFO_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/ValueTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class Function;
+class GlobalVariable;
+class Instruction;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class TargetLowering;
+class Value;
+
+//===--------------------------------------------------------------------===//
+/// FunctionLoweringInfo - This contains information that is global to a
+/// function that is used when lowering a region of the function.
+///
+class FunctionLoweringInfo {
+public:
+  TargetLowering &TLI;
+  Function *Fn;
+  MachineFunction *MF;
+  MachineRegisterInfo *RegInfo;
+
+  /// CanLowerReturn - true iff the function's return value can be lowered to
+  /// registers.
+  bool CanLowerReturn;
+
+  /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+  /// allocated to hold a pointer to the hidden sret parameter.
+  unsigned DemoteRegister;
+
+  explicit FunctionLoweringInfo(TargetLowering &TLI);
+
+  /// set - Initialize this FunctionLoweringInfo with the given Function
+  /// and its associated MachineFunction.
+  ///
+  void set(Function &Fn, MachineFunction &MF, bool EnableFastISel);
+
+  /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+  DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+  /// ValueMap - Since we emit code for the function a basic block at a time,
+  /// we must remember which virtual registers hold the values for
+  /// cross-basic-block values.
+  DenseMap<const Value*, unsigned> ValueMap;
+
+  /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+  /// the entry block.  This allows the allocas to be efficiently referenced
+  /// anywhere in the function.
+  DenseMap<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+  SmallSet<Instruction*, 8> CatchInfoLost;
+  SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+  unsigned MakeReg(EVT VT);
+  
+  /// isExportedInst - Return true if the specified value is an instruction
+  /// exported from its block.
+  bool isExportedInst(const Value *V) {
+    return ValueMap.count(V);
+  }
+
+  unsigned CreateRegForValue(const Value *V);
+  
+  unsigned InitializeRegForValue(const Value *V) {
+    unsigned &R = ValueMap[V];
+    assert(R == 0 && "Already initialized this value register!");
+    return R = CreateRegForValue(V);
+  }
+  
+  struct LiveOutInfo {
+    unsigned NumSignBits;
+    APInt KnownOne, KnownZero;
+    LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
+  };
+  
+  /// LiveOutRegInfo - Information about live out vregs, indexed by their
+  /// register number offset by 'FirstVirtualRegister'.
+  std::vector<LiveOutInfo> LiveOutRegInfo;
+
+  /// clear - Clear out all the function-specific state. This returns this
+  /// FunctionLoweringInfo to an empty state, ready to be used for a
+  /// different function.
+  void clear();
+};
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                            const unsigned *Indices,
+                            const unsigned *IndicesEnd,
+                            unsigned CurIndex = 0);
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = 0,
+                     uint64_t StartingOffset = 0);
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *ExtractTypeInfo(Value *V);
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+
+/// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
+void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                   MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 52b0832..669d414 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -350,7 +350,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
     MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
                                             ES->getTargetFlags()));
   } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress()));
+    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+                                            BA->getTargetFlags()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Flag &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 4f0a229..273dbf0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -158,7 +158,6 @@ private:
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
   SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
-  SDValue ExpandDBG_STOPPOINT(SDNode *Node);
   void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
                                 SmallVectorImpl<SDValue> &Results);
   SDValue ExpandFCOPYSIGN(SDNode *Node);
@@ -1517,6 +1516,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // Create the stack frame object.
   EVT VT = Node->getValueType(0);
   EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
   int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
@@ -1524,7 +1524,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
 
   // Emit a store of each element to the stack slot.
   SmallVector<SDValue, 8> Stores;
-  unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
+  unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
   // Store (in the right endianness) the elements to memory.
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
     // Ignore undef elements.
@@ -1535,8 +1535,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
 
-    Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
-                                  Idx, SV, Offset));
+    // If EltVT smaller than OpVT, only store the bits necessary.
+    if (EltVT.bitsLT(OpVT))
+      Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+                          Node->getOperand(i), Idx, SV, Offset, EltVT));
+    else
+      Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 
+                                    Node->getOperand(i), Idx, SV, Offset));
   }
 
   SDValue StoreChain;
@@ -1590,37 +1595,6 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
                      AbsVal);
 }
 
-SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
-  DebugLoc dl = Node->getDebugLoc();
-  DwarfWriter *DW = DAG.getDwarfWriter();
-  bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC,
-                                                    MVT::Other);
-  bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other);
-
-  const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node);
-  MDNode *CU_Node = DSP->getCompileUnit();
-  if (DW && (useDEBUG_LOC || useLABEL)) {
-
-    unsigned Line = DSP->getLine();
-    unsigned Col = DSP->getColumn();
-
-    if (OptLevel == CodeGenOpt::None) {
-      // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it
-      // won't hurt anything.
-      if (useDEBUG_LOC) {
-        return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0),
-                           DAG.getConstant(Line, MVT::i32),
-                           DAG.getConstant(Col, MVT::i32),
-                           DAG.getSrcValue(CU_Node));
-      } else {
-        unsigned ID = DW->RecordSourceLine(Line, Col, CU_Node);
-        return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID);
-      }
-    }
-  }
-  return Node->getOperand(0);
-}
-
 void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
                                            SmallVectorImpl<SDValue> &Results) {
   unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
@@ -2269,16 +2243,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
     break;
   case ISD::EH_RETURN:
-  case ISD::DBG_LABEL:
   case ISD::EH_LABEL:
   case ISD::PREFETCH:
   case ISD::MEMBARRIER:
   case ISD::VAEND:
     Results.push_back(Node->getOperand(0));
     break;
-  case ISD::DBG_STOPPOINT:
-    Results.push_back(ExpandDBG_STOPPOINT(Node));
-    break;
   case ISD::DYNAMIC_STACKALLOC:
     ExpandDYNAMIC_STACKALLOC(Node, Results);
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index c4bd552..003cea7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -64,8 +64,12 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
   // The final node obtained by mapping by ReplacedValues is not marked NewNode.
   // Note that ReplacedValues should be applied iteratively.
 
-  // Note that the ReplacedValues map may also map deleted nodes.  By iterating
-  // over the DAG we only consider non-deleted nodes.
+  // Note that the ReplacedValues map may also map deleted nodes (by iterating
+  // over the DAG we never dereference deleted nodes).  This means that it may
+  // also map nodes marked NewNode if the deallocated memory was reallocated as
+  // another node, and that new node was not seen by the LegalizeTypes machinery
+  // (for example because it was created but not used).  In general, we cannot
+  // distinguish between new nodes and deleted nodes.
   SmallVector<SDNode*, 16> NewNodes;
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
        E = DAG.allnodes_end(); I != E; ++I) {
@@ -114,7 +118,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
         Mapped |= 128;
 
       if (I->getNodeId() != Processed) {
-        if (Mapped != 0) {
+        // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+        // marked NewNode too, since a deleted node may have been reallocated as
+        // another node that has not been seen by the LegalizeTypes machinery.
+        if ((I->getNodeId() == NewNode && Mapped > 1) ||
+            (I->getNodeId() != NewNode && Mapped != 0)) {
           errs() << "Unprocessed value in a map!";
           Failed = true;
         }
@@ -320,16 +328,12 @@ ScanOperands:
         continue;
 
       // The node morphed - this is equivalent to legalizing by replacing every
-      // value of N with the corresponding value of M.  So do that now.  However
-      // there is no need to remember the replacement - morphing will make sure
-      // it is never used non-trivially.
+      // value of N with the corresponding value of M.  So do that now.
       assert(N->getNumValues() == M->getNumValues() &&
              "Node morphing changed the number of results!");
       for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
-        // Replacing the value takes care of remapping the new value.  Do the
-        // replacement without recording it in ReplacedValues.  This does not
-        // expunge From but that is fine - it is not really a new node.
-        ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i));
+        // Replacing the value takes care of remapping the new value.
+        ReplaceValueWith(SDValue(N, i), SDValue(M, i));
       assert(N->getNodeId() == NewNode && "Unexpected node state!");
       // The node continues to live on as part of the NewNode fungus that
       // grows on top of the useful nodes.  Nothing more needs to be done
@@ -666,14 +670,14 @@ namespace {
 }
 
 
-/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith.  Updates the
-/// DAG causing any uses of From to use To instead, but without expunging From
-/// or recording the replacement in ReplacedValues.  Do not call directly unless
-/// you really know what you are doing!
-void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value.  Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
   assert(From.getNode() != To.getNode() && "Potential legalization loop!");
 
   // If expansion produced new nodes, make sure they are properly marked.
+  ExpungeNode(From.getNode());
   AnalyzeNewValue(To); // Expunges To.
 
   // Anything that used the old node should now use the new one.  Note that this
@@ -682,6 +686,10 @@ void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
   NodeUpdateListener NUL(*this, NodesToAnalyze);
   DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
 
+  // The old node may still be present in a map like ExpandedIntegers or
+  // PromotedIntegers.  Inform maps about the replacement.
+  ReplacedValues[From] = To;
+
   // Process the list of nodes that need to be reanalyzed.
   while (!NodesToAnalyze.empty()) {
     SDNode *N = NodesToAnalyze.back();
@@ -712,25 +720,6 @@ void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
   }
 }
 
-/// ReplaceValueWith - The specified value was legalized to the specified other
-/// value.  Update the DAG and NodeIds replacing any uses of From to use To
-/// instead.
-void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
-  assert(From.getNode()->getNodeId() == ReadyToProcess &&
-         "Only the node being processed may be remapped!");
-
-  // If expansion produced new nodes, make sure they are properly marked.
-  ExpungeNode(From.getNode());
-  AnalyzeNewValue(To); // Expunges To.
-
-  // The old node may still be present in a map like ExpandedIntegers or
-  // PromotedIntegers.  Inform maps about the replacement.
-  ReplacedValues[From] = To;
-
-  // Do the replacement.
-  ReplaceValueWithHelper(From, To);
-}
-
 void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
   assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for promoted integer");
@@ -918,6 +907,29 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
   return true;
 }
 
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  TLI.ReplaceNodeResults(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom widen lower its result  after all.
+    return false;
+
+  // Update the widening map.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    SetWidenedVector(SDValue(N, i), Results[i]);
+  return true;
+}
+
 /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
 /// which is split into two not necessarily identical pieces.
 void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e1b7022..2ee9f8a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -188,6 +188,7 @@ private:
   SDValue BitConvertVectorToIntegerVector(SDValue Op);
   SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
   bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+  bool CustomWidenLowerNode(SDNode *N, EVT VT);
   SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
   SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
@@ -196,7 +197,6 @@ private:
                       DebugLoc dl);
   SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
   void ReplaceValueWith(SDValue From, SDValue To);
-  void ReplaceValueWithHelper(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
   void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
                     SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ca19430..785c2ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -54,9 +54,6 @@ class VectorLegalizer {
   SDValue LegalizeOp(SDValue Op);
   // Assuming the node is legal, "legalize" the results
   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
-  // Implements unrolling a generic vector operation, i.e. turning it into
-  // scalar operations.
-  SDValue UnrollVectorOp(SDValue Op);
   // Implements unrolling a VSETCC.
   SDValue UnrollVSETCC(SDValue Op);
   // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
@@ -211,7 +208,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     else if (Node->getOpcode() == ISD::VSETCC)
       Result = UnrollVSETCC(Op);
     else
-      Result = UnrollVectorOp(Op);
+      Result = DAG.UnrollVectorOp(Op.getNode());
     break;
   }
 
@@ -256,7 +253,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
     return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
                        Zero, Op.getOperand(0));
   }
-  return UnrollVectorOp(Op);
+  return DAG.UnrollVectorOp(Op.getNode());
 }
 
 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
@@ -282,56 +279,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
 }
 
-/// UnrollVectorOp - We know that the given vector has a legal type, however
-/// the operation it performs is not legal, and the target has requested that
-/// the operation be expanded.  "Unroll" the vector, splitting out the scalars
-/// and operating on each element individually.
-SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
-  EVT VT = Op.getValueType();
-  assert(Op.getNode()->getNumValues() == 1 &&
-         "Can't unroll a vector with multiple results!");
-  unsigned NE = VT.getVectorNumElements();
-  EVT EltVT = VT.getVectorElementType();
-  DebugLoc dl = Op.getDebugLoc();
-
-  SmallVector<SDValue, 8> Scalars;
-  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
-  for (unsigned i = 0; i != NE; ++i) {
-    for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
-      SDValue Operand = Op.getOperand(j);
-      EVT OperandVT = Operand.getValueType();
-      if (OperandVT.isVector()) {
-        // A vector operand; extract a single element.
-        EVT OperandEltVT = OperandVT.getVectorElementType();
-        Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                  OperandEltVT,
-                                  Operand,
-                                  DAG.getConstant(i, MVT::i32));
-      } else {
-        // A scalar operand; just use it as is.
-        Operands[j] = Operand;
-      }
-    }
-
-    switch (Op.getOpcode()) {
-    default:
-      Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT,
-                                    &Operands[0], Operands.size()));
-      break;
-    case ISD::SHL:
-    case ISD::SRA:
-    case ISD::SRL:
-    case ISD::ROTL:
-    case ISD::ROTR:
-      Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0],
-                                    DAG.getShiftAmountOperand(Operands[1])));
-      break;
-    }
-  }
-
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size());
-}
-
 }
 
 bool SelectionDAG::LegalizeVectors() {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 75e1239..023324b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1118,8 +1118,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   DEBUG(errs() << "Widen node result " << ResNo << ": ";
         N->dump(&DAG);
         errs() << "\n");
-  SDValue Res = SDValue();
 
+  // See if the target wants to custom widen this node.
+  if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+    return;
+
+  SDValue Res = SDValue();
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4530ffc..c38c79b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -200,19 +200,6 @@ bool ISD::isScalarToVector(const SDNode *N) {
   return true;
 }
 
-
-/// isDebugLabel - Return true if the specified node represents a debug
-/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node).
-bool ISD::isDebugLabel(const SDNode *N) {
-  SDValue Zero;
-  if (N->getOpcode() == ISD::DBG_LABEL)
-    return true;
-  if (N->isMachineOpcode() &&
-      N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL)
-    return true;
-  return false;
-}
-
 /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
 /// when given the operation for (X op Y).
 ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
@@ -393,13 +380,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::Register:
     ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
     break;
-  case ISD::DBG_STOPPOINT: {
-    const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(N);
-    ID.AddInteger(DSP->getLine());
-    ID.AddInteger(DSP->getColumn());
-    ID.AddPointer(DSP->getCompileUnit());
-    break;
-  }
+
   case ISD::SRCVALUE:
     ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
     break;
@@ -462,7 +443,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   }
   case ISD::TargetBlockAddress:
   case ISD::BlockAddress: {
-    ID.AddPointer(cast<BlockAddressSDNode>(N));
+    ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
+    ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
     break;
   }
   } // end switch (N->getOpcode())
@@ -508,8 +490,6 @@ static bool doNotCSE(SDNode *N) {
   switch (N->getOpcode()) {
   default: break;
   case ISD::HANDLENODE:
-  case ISD::DBG_LABEL:
-  case ISD::DBG_STOPPOINT:
   case ISD::EH_LABEL:
     return true;   // Never CSE these nodes.
   }
@@ -1296,16 +1276,6 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root,
-                                      unsigned Line, unsigned Col,
-                                      MDNode *CU) {
-  SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>();
-  new (N) DbgStopPointSDNode(Root, Line, Col, CU);
-  N->setDebugLoc(DL);
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
 SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
                                SDValue Root,
                                unsigned LabelID) {
@@ -1323,18 +1293,20 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, DebugLoc DL,
-                                      bool isTarget) {
+SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
   unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
 
   FoldingSetNodeID ID;
-  AddNodeIDNode(ID, Opc, getVTList(TLI.getPointerTy()), 0, 0);
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddPointer(BA);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
-  new (N) BlockAddressSDNode(Opc, DL, TLI.getPointerTy(), BA);
+  new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -5452,7 +5424,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UNDEF:         return "undef";
   case ISD::MERGE_VALUES:  return "merge_values";
   case ISD::INLINEASM:     return "inlineasm";
-  case ISD::DBG_LABEL:     return "dbg_label";
   case ISD::EH_LABEL:      return "eh_label";
   case ISD::HANDLENODE:    return "handlenode";
 
@@ -5586,10 +5557,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::CTTZ:    return "cttz";
   case ISD::CTLZ:    return "ctlz";
 
-  // Debug info
-  case ISD::DBG_STOPPOINT: return "dbg_stoppoint";
-  case ISD::DEBUG_LOC: return "debug_loc";
-
   // Trampolines
   case ISD::TRAMPOLINE: return "trampoline";
 
@@ -5810,6 +5777,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     OS << ", ";
     WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
     OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   }
 }
 
@@ -5838,6 +5807,66 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
   N->dump(G);
 }
 
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+  assert(N->getNumValues() == 1 &&
+         "Can't unroll a vector with multiple results!");
+
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  unsigned i;
+  for (i= 0; i != NE; ++i) {
+    for (unsigned j = 0; j != N->getNumOperands(); ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        // A vector operand; extract a single element.
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                              OperandEltVT,
+                              Operand,
+                              getConstant(i, MVT::i32));
+      } else {
+        // A scalar operand; just use it as is.
+        Operands[j] = Operand;
+      }
+    }
+
+    switch (N->getOpcode()) {
+    default:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                &Operands[0], Operands.size()));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+                                getShiftAmountOperand(Operands[1])));
+      break;
+    }
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(getUNDEF(EltVT));
+
+  return getNode(ISD::BUILD_VECTOR, dl,
+                 EVT::getVectorVT(*getContext(), EltVT, ResNE),
+                 &Scalars[0], Scalars.size());
+}
+
 void SelectionDAG::dump() const {
   errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
 
@@ -5993,3 +6022,4 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
       return false;
   return true;
 }
+
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 90fd95e..57d8903 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===//
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
-#include "SelectionDAGBuild.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Constants.h"
-#include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -68,85 +68,7 @@ LimitFPPrecision("limit-float-precision",
                  cl::location(LimitFloatPrecision),
                  cl::init(0));
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                                   const unsigned *Indices,
-                                   const unsigned *IndicesEnd,
-                                   unsigned CurIndex = 0) {
-  // Base case: We're done.
-  if (Indices && Indices == IndicesEnd)
-    return CurIndex;
-
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-        EI != EE; ++EI) {
-      if (Indices && *Indices == unsigned(EI - EB))
-        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // Given an array type, recursively traverse the elements.
-  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
-      if (Indices && *Indices == i)
-        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // We haven't found the type we're looking for, so keep searching.
-  return CurIndex + 1;
-}
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                            SmallVectorImpl<EVT> &ValueVTs,
-                            SmallVectorImpl<uint64_t> *Offsets = 0,
-                            uint64_t StartingOffset = 0) {
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-         EI != EE; ++EI)
-      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
-                      StartingOffset + SL->getElementOffset(EI - EB));
-    return;
-  }
-  // Given an array type, recursively traverse the elements.
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
-                      StartingOffset + i * EltSize);
-    return;
-  }
-  // Interpret void as zero return values.
-  if (Ty == Type::getVoidTy(Ty->getContext()))
-    return;
-  // Base case: we can get an EVT for this LLVM IR type.
-  ValueVTs.push_back(TLI.getValueType(Ty));
-  if (Offsets)
-    Offsets->push_back(StartingOffset);
-}
-
-namespace llvm {
+namespace {
   /// RegsForValue - This struct represents the registers (physical or virtual)
   /// that a particular set of values is assigned, and the type information about
   /// the value. The most common situation is to represent one value at a time,
@@ -156,7 +78,7 @@ namespace llvm {
   /// have legal types, so each value may require one or more registers of some
   /// legal type.
   ///
-  struct VISIBILITY_HIDDEN RegsForValue {
+  struct RegsForValue {
     /// TLI - The TargetLowering object.
     ///
     const TargetLowering *TLI;
@@ -241,150 +163,6 @@ namespace llvm {
   };
 }
 
-/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
-/// PHI nodes or outside of the basic block that defines it, or used by a
-/// switch or atomic instruction, which may expand to multiple basic blocks.
-static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
-  if (isa<PHINode>(I)) return true;
-  BasicBlock *BB = I->getParent();
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
-    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
-      return true;
-  return false;
-}
-
-/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
-/// entry block, return true.  This includes arguments used by switches, since
-/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
-  // With FastISel active, we may be splitting blocks, so force creation
-  // of virtual registers for all non-dead arguments.
-  // Don't force virtual registers for byval arguments though, because
-  // fast-isel can't handle those in all cases.
-  if (EnableFastISel && !A->hasByValAttr())
-    return A->use_empty();
-
-  BasicBlock *Entry = A->getParent()->begin();
-  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
-    if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
-      return false;  // Use not in entry block.
-  return true;
-}
-
-FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
-  : TLI(tli) {
-}
-
-void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
-                               SelectionDAG &DAG,
-                               bool EnableFastISel) {
-  Fn = &fn;
-  MF = &mf;
-  RegInfo = &MF->getRegInfo();
-
-  // Create a vreg for each argument register that is not dead and is used
-  // outside of the entry block for the function.
-  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
-       AI != E; ++AI)
-    if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
-      InitializeRegForValue(AI);
-
-  // Initialize the mapping of values to registers.  This is only set up for
-  // instruction values that are used outside of the block that defines
-  // them.
-  Function::iterator BB = Fn->begin(), EB = Fn->end();
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
-        const Type *Ty = AI->getAllocatedType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
-        unsigned Align =
-          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
-                   AI->getAlignment());
-
-        TySize *= CUI->getZExtValue();   // Get total allocated size.
-        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
-        StaticAllocaMap[AI] =
-          MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
-      }
-
-  for (; BB != EB; ++BB)
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
-        if (!isa<AllocaInst>(I) ||
-            !StaticAllocaMap.count(cast<AllocaInst>(I)))
-          InitializeRegForValue(I);
-
-  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
-  // also creates the initial PHI MachineInstrs, though none of the input
-  // operands are populated.
-  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
-    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
-    MBBMap[BB] = MBB;
-    MF->push_back(MBB);
-
-    // Transfer the address-taken flag. This is necessary because there could
-    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
-    // the first one should be marked.
-    if (BB->hasAddressTaken())
-      MBB->setHasAddressTaken();
-
-    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
-    // appropriate.
-    PHINode *PN;
-    DebugLoc DL;
-    for (BasicBlock::iterator
-           I = BB->begin(), E = BB->end(); I != E; ++I) {
-
-      PN = dyn_cast<PHINode>(I);
-      if (!PN || PN->use_empty()) continue;
-
-      unsigned PHIReg = ValueMap[PN];
-      assert(PHIReg && "PHI node does not have an assigned virtual register!");
-
-      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
-      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        EVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
-        const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-        for (unsigned i = 0; i != NumRegisters; ++i)
-          BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
-        PHIReg += NumRegisters;
-      }
-    }
-  }
-}
-
-unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
-  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
-}
-
-/// CreateRegForValue - Allocate the appropriate number of virtual registers of
-/// the correctly promoted or expanded types.  Assign these registers
-/// consecutive vreg numbers and return the first assigned number.
-///
-/// In the case that the given value has struct or array type, this function
-/// will assign registers for each member or element.
-///
-unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, V->getType(), ValueVTs);
-
-  unsigned FirstReg = 0;
-  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
-    EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
-
-    unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      unsigned R = MakeReg(RegisterVT);
-      if (!FirstReg) FirstReg = R;
-    }
-  }
-  return FirstReg;
-}
-
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
 /// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -723,19 +501,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
 }
 
 
-void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
   AA = &aa;
   GFI = gfi;
   TD = DAG.getTarget().getTargetData();
 }
 
 /// clear - Clear out the curret SelectionDAG and the associated
-/// state and prepare this SelectionDAGLowering object to be used
+/// state and prepare this SelectionDAGBuilder object to be used
 /// for a new block. This doesn't clear out information about
 /// additional blocks that are needed to complete switch lowering
 /// or PHI node updating; that information is cleared out as it is
 /// consumed.
-void SelectionDAGLowering::clear() {
+void SelectionDAGBuilder::clear() {
   NodeMap.clear();
   PendingLoads.clear();
   PendingExports.clear();
@@ -750,7 +528,7 @@ void SelectionDAGLowering::clear() {
 /// a store or any other node that may need to be ordered after any
 /// prior load instructions.
 ///
-SDValue SelectionDAGLowering::getRoot() {
+SDValue SelectionDAGBuilder::getRoot() {
   if (PendingLoads.empty())
     return DAG.getRoot();
 
@@ -773,7 +551,7 @@ SDValue SelectionDAGLowering::getRoot() {
 /// PendingLoad items, flush all the PendingExports items. It is necessary
 /// to do this before emitting a terminator instruction.
 ///
-SDValue SelectionDAGLowering::getControlRoot() {
+SDValue SelectionDAGBuilder::getControlRoot() {
   SDValue Root = DAG.getRoot();
 
   if (PendingExports.empty())
@@ -800,11 +578,11 @@ SDValue SelectionDAGLowering::getControlRoot() {
   return Root;
 }
 
-void SelectionDAGLowering::visit(Instruction &I) {
+void SelectionDAGBuilder::visit(Instruction &I) {
   visit(I.getOpcode(), I);
 }
 
-void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
+void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
@@ -816,7 +594,7 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
   }
 }
 
-SDValue SelectionDAGLowering::getValue(const Value *V) {
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
   SDValue &N = NodeMap[V];
   if (N.getNode()) return N;
 
@@ -884,7 +662,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
     }
 
     if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
-      return DAG.getBlockAddress(BA, getCurDebugLoc());
+      return DAG.getBlockAddress(BA, VT);
 
     const VectorType *VecTy = cast<VectorType>(V->getType());
     unsigned NumElements = VecTy->getNumElements();
@@ -981,7 +759,7 @@ static void getReturnInfo(const Type* ReturnType,
   }
 }
 
-void SelectionDAGLowering::visitRet(ReturnInst &I) {
+void SelectionDAGBuilder::visitRet(ReturnInst &I) {
   SDValue Chain = getControlRoot();
   SmallVector<ISD::OutputArg, 8> Outs;
   FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
@@ -1086,7 +864,7 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) {
 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
 /// created for it, emit nodes to copy the value into the virtual
 /// registers.
-void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) {
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
   if (!V->use_empty()) {
     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
     if (VMI != FuncInfo.ValueMap.end())
@@ -1097,7 +875,7 @@ void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) {
 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
 /// the current basic block, add it to ValueMap now so that we'll get a
 /// CopyTo/FromReg.
-void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
+void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
   // No need to export constants.
   if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
 
@@ -1108,8 +886,8 @@ void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
   CopyValueToVirtualRegister(V, Reg);
 }
 
-bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,
-                                                    const BasicBlock *FromBB) {
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
+                                                     const BasicBlock *FromBB) {
   // The operands of the setcc have to be in this block.  We don't know
   // how to export them from some other block.
   if (Instruction *VI = dyn_cast<Instruction>(V)) {
@@ -1201,10 +979,10 @@ static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
 /// AND operator tree.
 ///
 void
-SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
-                                                   MachineBasicBlock *TBB,
-                                                   MachineBasicBlock *FBB,
-                                                   MachineBasicBlock *CurBB) {
+SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
+                                                  MachineBasicBlock *TBB,
+                                                  MachineBasicBlock *FBB,
+                                                  MachineBasicBlock *CurBB) {
   const BasicBlock *BB = CurBB->getBasicBlock();
 
   // If the leaf of the tree is a comparison, merge the condition into
@@ -1240,11 +1018,11 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
 }
 
 /// FindMergedConditions - If Cond is an expression like
-void SelectionDAGLowering::FindMergedConditions(Value *Cond,
-                                                MachineBasicBlock *TBB,
-                                                MachineBasicBlock *FBB,
-                                                MachineBasicBlock *CurBB,
-                                                unsigned Opc) {
+void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
+                                               MachineBasicBlock *TBB,
+                                               MachineBasicBlock *FBB,
+                                               MachineBasicBlock *CurBB,
+                                               unsigned Opc) {
   // If this node is not part of the or/and tree, emit it as a branch.
   Instruction *BOp = dyn_cast<Instruction>(Cond);
   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1299,7 +1077,7 @@ void SelectionDAGLowering::FindMergedConditions(Value *Cond,
 /// If we should emit this as a bunch of and/or'd together conditions, return
 /// false.
 bool
-SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
   if (Cases.size() != 2) return true;
 
   // If this is two comparisons of the same values or'd or and'd together, they
@@ -1314,7 +1092,7 @@ SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
   return true;
 }
 
-void SelectionDAGLowering::visitBr(BranchInst &I) {
+void SelectionDAGBuilder::visitBr(BranchInst &I) {
   // Update machine-CFG edges.
   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
 
@@ -1398,7 +1176,7 @@ void SelectionDAGLowering::visitBr(BranchInst &I) {
 
 /// visitSwitchCase - Emits the necessary code to represent a single node in
 /// the binary search tree resulting from lowering a switch instruction.
-void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
   SDValue Cond;
   SDValue CondLHS = getValue(CB.CmpLHS);
   DebugLoc dl = getCurDebugLoc();
@@ -1476,7 +1254,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
 }
 
 /// visitJumpTable - Emit JumpTable node in the current MBB
-void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
   // Emit the code for the jump table
   assert(JT.Reg != -1U && "Should lower JT Header first!");
   EVT PTy = TLI.getPointerTy();
@@ -1490,8 +1268,8 @@ void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {
 
 /// visitJumpTableHeader - This function emits necessary code to produce index
 /// in the JumpTable from switch case.
-void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
-                                                JumpTableHeader &JTH) {
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+                                               JumpTableHeader &JTH) {
   // Subtract the lowest switch case value from the value being switched on and
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
@@ -1540,7 +1318,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
 
 /// visitBitTestHeader - This function emits necessary code to produce value
 /// suitable for "bit tests"
-void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
   EVT VT = SwitchOp.getValueType();
@@ -1583,9 +1361,9 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
 }
 
 /// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
-                                            unsigned Reg,
-                                            BitTestCase &B) {
+void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+                                           unsigned Reg,
+                                           BitTestCase &B) {
   // Make desired shift
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
                                        TLI.getPointerTy());
@@ -1624,7 +1402,7 @@ void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
                             DAG.getBasicBlock(NextMBB)));
 }
 
-void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
+void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
   // Retrieve successors.
   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
   MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
@@ -1649,15 +1427,15 @@ void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
                           DAG.getBasicBlock(Return)));
 }
 
-void SelectionDAGLowering::visitUnwind(UnwindInst &I) {
+void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
 }
 
 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
 /// small case ranges).
-bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
-                                                  CaseRecVector& WorkList,
-                                                  Value* SV,
-                                                  MachineBasicBlock* Default) {
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+                                                 CaseRecVector& WorkList,
+                                                 Value* SV,
+                                                 MachineBasicBlock* Default) {
   Case& BackCase  = *(CR.Range.second-1);
 
   // Size is the number of Cases represented by this range.
@@ -1751,10 +1529,10 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) {
 }
 
 /// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
-                                              CaseRecVector& WorkList,
-                                              Value* SV,
-                                              MachineBasicBlock* Default) {
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
+                                             CaseRecVector& WorkList,
+                                             Value* SV,
+                                             MachineBasicBlock* Default) {
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
 
@@ -1845,10 +1623,10 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
 
 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into
 /// 2 subtrees.
-bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
-                                                   CaseRecVector& WorkList,
-                                                   Value* SV,
-                                                   MachineBasicBlock* Default) {
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+                                                  CaseRecVector& WorkList,
+                                                  Value* SV,
+                                                  MachineBasicBlock* Default) {
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
   MachineFunction *CurMF = FuncInfo.MF;
@@ -1973,10 +1751,10 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
 /// handleBitTestsSwitchCase - if current case range has few destination and
 /// range span less, than machine word bitwidth, encode case range into series
 /// of masks and emit bit tests with these masks.
-bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
-                                                    CaseRecVector& WorkList,
-                                                    Value* SV,
-                                                    MachineBasicBlock* Default){
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+                                                   CaseRecVector& WorkList,
+                                                   Value* SV,
+                                                   MachineBasicBlock* Default){
   EVT PTy = TLI.getPointerTy();
   unsigned IntPtrBits = PTy.getSizeInBits();
 
@@ -2104,8 +1882,8 @@ bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
 
 
 /// Clusterify - Transform simple list of Cases into list of CaseRange's
-size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,
-                                          const SwitchInst& SI) {
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+                                       const SwitchInst& SI) {
   size_t numCmps = 0;
 
   // Start with "simple" cases
@@ -2146,7 +1924,7 @@ size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,
   return numCmps;
 }
 
-void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
+void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
 
@@ -2209,7 +1987,7 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
   }
 }
 
-void SelectionDAGLowering::visitIndirectBr(IndirectBrInst &I) {
+void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
   // Update machine-CFG edges.
   for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
     CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
@@ -2220,7 +1998,7 @@ void SelectionDAGLowering::visitIndirectBr(IndirectBrInst &I) {
 }
 
 
-void SelectionDAGLowering::visitFSub(User &I) {
+void SelectionDAGBuilder::visitFSub(User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
   if (isa<VectorType>(Ty)) {
@@ -2249,7 +2027,7 @@ void SelectionDAGLowering::visitFSub(User &I) {
   visitBinary(I, ISD::FSUB);
 }
 
-void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
+void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
 
@@ -2257,7 +2035,7 @@ void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
+void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   if (!isa<VectorType>(I.getType()) &&
@@ -2291,7 +2069,7 @@ void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGLowering::visitICmp(User &I) {
+void SelectionDAGBuilder::visitICmp(User &I) {
   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
   if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
     predicate = IC->getPredicate();
@@ -2305,7 +2083,7 @@ void SelectionDAGLowering::visitICmp(User &I) {
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
 }
 
-void SelectionDAGLowering::visitFCmp(User &I) {
+void SelectionDAGBuilder::visitFCmp(User &I) {
   FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
   if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
     predicate = FC->getPredicate();
@@ -2318,7 +2096,7 @@ void SelectionDAGLowering::visitFCmp(User &I) {
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
 
-void SelectionDAGLowering::visitSelect(User &I) {
+void SelectionDAGBuilder::visitSelect(User &I) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   unsigned NumValues = ValueVTs.size();
@@ -2341,14 +2119,14 @@ void SelectionDAGLowering::visitSelect(User &I) {
 }
 
 
-void SelectionDAGLowering::visitTrunc(User &I) {
+void SelectionDAGBuilder::visitTrunc(User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitZExt(User &I) {
+void SelectionDAGBuilder::visitZExt(User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2356,7 +2134,7 @@ void SelectionDAGLowering::visitZExt(User &I) {
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitSExt(User &I) {
+void SelectionDAGBuilder::visitSExt(User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2364,7 +2142,7 @@ void SelectionDAGLowering::visitSExt(User &I) {
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitFPTrunc(User &I) {
+void SelectionDAGBuilder::visitFPTrunc(User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
@@ -2372,42 +2150,42 @@ void SelectionDAGLowering::visitFPTrunc(User &I) {
                            DestVT, N, DAG.getIntPtrConstant(0)));
 }
 
-void SelectionDAGLowering::visitFPExt(User &I){
+void SelectionDAGBuilder::visitFPExt(User &I){
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitFPToUI(User &I) {
+void SelectionDAGBuilder::visitFPToUI(User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitFPToSI(User &I) {
+void SelectionDAGBuilder::visitFPToSI(User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitUIToFP(User &I) {
+void SelectionDAGBuilder::visitUIToFP(User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitSIToFP(User &I){
+void SelectionDAGBuilder::visitSIToFP(User &I){
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGLowering::visitPtrToInt(User &I) {
+void SelectionDAGBuilder::visitPtrToInt(User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2417,7 +2195,7 @@ void SelectionDAGLowering::visitPtrToInt(User &I) {
   setValue(&I, Result);
 }
 
-void SelectionDAGLowering::visitIntToPtr(User &I) {
+void SelectionDAGBuilder::visitIntToPtr(User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2426,7 +2204,7 @@ void SelectionDAGLowering::visitIntToPtr(User &I) {
   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
-void SelectionDAGLowering::visitBitCast(User &I) {
+void SelectionDAGBuilder::visitBitCast(User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
 
@@ -2439,7 +2217,7 @@ void SelectionDAGLowering::visitBitCast(User &I) {
     setValue(&I, N); // noop cast.
 }
 
-void SelectionDAGLowering::visitInsertElement(User &I) {
+void SelectionDAGBuilder::visitInsertElement(User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InVal = getValue(I.getOperand(1));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
@@ -2451,7 +2229,7 @@ void SelectionDAGLowering::visitInsertElement(User &I) {
                            InVec, InVal, InIdx));
 }
 
-void SelectionDAGLowering::visitExtractElement(User &I) {
+void SelectionDAGBuilder::visitExtractElement(User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
                                 TLI.getPointerTy(),
@@ -2471,7 +2249,7 @@ static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
   return true;
 }
 
-void SelectionDAGLowering::visitShuffleVector(User &I) {
+void SelectionDAGBuilder::visitShuffleVector(User &I) {
   SmallVector<int, 8> Mask;
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
@@ -2645,7 +2423,7 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
                            VT, &Ops[0], Ops.size()));
 }
 
-void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
+void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Value *Op1 = I.getOperand(1);
   const Type *AggTy = I.getType();
@@ -2686,7 +2464,7 @@ void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
                            &Values[0], NumAggValues));
 }
 
-void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {
+void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Type *AggTy = Op0->getType();
   const Type *ValTy = I.getType();
@@ -2715,7 +2493,7 @@ void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {
 }
 
 
-void SelectionDAGLowering::visitGetElementPtr(User &I) {
+void SelectionDAGBuilder::visitGetElementPtr(User &I) {
   SDValue N = getValue(I.getOperand(0));
   const Type *Ty = I.getOperand(0)->getType();
 
@@ -2784,7 +2562,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) {
   setValue(&I, N);
 }
 
-void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
+void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
   // If this is a fixed sized alloca in the entry block of the function,
   // allocate it statically on the stack.
   if (FuncInfo.StaticAllocaMap.count(&I))
@@ -2837,7 +2615,7 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
   FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
 }
 
-void SelectionDAGLowering::visitLoad(LoadInst &I) {
+void SelectionDAGBuilder::visitLoad(LoadInst &I) {
   const Value *SV = I.getOperand(0);
   SDValue Ptr = getValue(SV);
 
@@ -2895,7 +2673,7 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) {
 }
 
 
-void SelectionDAGLowering::visitStore(StoreInst &I) {
+void SelectionDAGBuilder::visitStore(StoreInst &I) {
   Value *SrcV = I.getOperand(0);
   Value *PtrV = I.getOperand(1);
 
@@ -2931,8 +2709,8 @@ void SelectionDAGLowering::visitStore(StoreInst &I) {
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
 /// node.
-void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
-                                                unsigned Intrinsic) {
+void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
+                                               unsigned Intrinsic) {
   bool HasChain = !I.doesNotAccessMemory();
   bool OnlyLoad = HasChain && I.onlyReadsMemory();
 
@@ -3012,73 +2790,6 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
   }
 }
 
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-static GlobalVariable *ExtractTypeInfo(Value *V) {
-  V = V->stripPointerCasts();
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
-  assert ((GV || isa<ConstantPointerNull>(V)) &&
-          "TypeInfo must be a global variable or NULL");
-  return GV;
-}
-
-namespace llvm {
-
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
-                  MachineBasicBlock *MBB) {
-  // Inform the MachineModuleInfo of the personality for this landing pad.
-  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
-  assert(CE->getOpcode() == Instruction::BitCast &&
-         isa<Function>(CE->getOperand(0)) &&
-         "Personality should be a function");
-  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
-
-  // Gather all the type infos for this landing pad and pass them along to
-  // MachineModuleInfo.
-  std::vector<GlobalVariable *> TyInfo;
-  unsigned N = I.getNumOperands();
-
-  for (unsigned i = N - 1; i > 2; --i) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
-      unsigned FilterLength = CI->getZExtValue();
-      unsigned FirstCatch = i + FilterLength + !FilterLength;
-      assert (FirstCatch <= N && "Invalid filter length");
-
-      if (FirstCatch < N) {
-        TyInfo.reserve(N - FirstCatch);
-        for (unsigned j = FirstCatch; j < N; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
-        MMI->addCatchTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      if (!FilterLength) {
-        // Cleanup.
-        MMI->addCleanup(MBB);
-      } else {
-        // Filter.
-        TyInfo.reserve(FilterLength - 1);
-        for (unsigned j = i + 1; j < FirstCatch; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
-        MMI->addFilterTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      N = i;
-    }
-  }
-
-  if (N > 3) {
-    TyInfo.reserve(N - 3);
-    for (unsigned j = 3; j < N; ++j)
-      TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
-    MMI->addCatchTypeInfo(MBB, TyInfo);
-  }
-}
-
-}
-
 /// GetSignificand - Get the significand and build it into a floating-point
 /// number with exponent of 1:
 ///
@@ -3121,7 +2832,7 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
 /// visitIntrinsicCall: I is a call instruction
 ///                     Op is the associated NodeType for I
 const char *
-SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
   SDValue Root = getRoot();
   SDValue L =
     DAG.getAtomic(Op, getCurDebugLoc(),
@@ -3137,7 +2848,7 @@ SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
 
 // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
 const char *
-SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
   SDValue Op1 = getValue(I.getOperand(1));
   SDValue Op2 = getValue(I.getOperand(2));
 
@@ -3151,7 +2862,7 @@ SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGLowering::visitExp(CallInst &I) {
+SelectionDAGBuilder::visitExp(CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3277,7 +2988,7 @@ SelectionDAGLowering::visitExp(CallInst &I) {
 /// visitLog - Lower a log intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGLowering::visitLog(CallInst &I) {
+SelectionDAGBuilder::visitLog(CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3387,7 +3098,7 @@ SelectionDAGLowering::visitLog(CallInst &I) {
 /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGLowering::visitLog2(CallInst &I) {
+SelectionDAGBuilder::visitLog2(CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3496,7 +3207,7 @@ SelectionDAGLowering::visitLog2(CallInst &I) {
 /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGLowering::visitLog10(CallInst &I) {
+SelectionDAGBuilder::visitLog10(CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3598,7 +3309,7 @@ SelectionDAGLowering::visitLog10(CallInst &I) {
 /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGLowering::visitExp2(CallInst &I) {
+SelectionDAGBuilder::visitExp2(CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3712,7 +3423,7 @@ SelectionDAGLowering::visitExp2(CallInst &I) {
 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
 /// limited-precision mode with x == 10.0f.
 void
-SelectionDAGLowering::visitPow(CallInst &I) {
+SelectionDAGBuilder::visitPow(CallInst &I) {
   SDValue result;
   Value *Val = I.getOperand(1);
   DebugLoc dl = getCurDebugLoc();
@@ -3847,7 +3558,7 @@ SelectionDAGLowering::visitPow(CallInst &I) {
 /// we want to emit this as a call to a named external function, return the name
 /// otherwise lower it and return null.
 const char *
-SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   DebugLoc dl = getCurDebugLoc();
   switch (Intrinsic) {
   default:
@@ -4412,9 +4123,9 @@ isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
   return true;
 }
 
-void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
-                                       bool isTailCall,
-                                       MachineBasicBlock *LandingPad) {
+void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
+                                      bool isTailCall,
+                                      MachineBasicBlock *LandingPad) {
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   const Type *RetTy = FTy->getReturnType();
@@ -4561,7 +4272,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
 }
 
 
-void SelectionDAGLowering::visitCall(CallInst &I) {
+void SelectionDAGBuilder::visitCall(CallInst &I) {
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
     if (F->isDeclaration()) {
@@ -4956,7 +4667,7 @@ private:
 ///   OpInfo describes the operand.
 ///   Input and OutputRegs are the set of already allocated physical registers.
 ///
-void SelectionDAGLowering::
+void SelectionDAGBuilder::
 GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
                      std::set<unsigned> &OutputRegs,
                      std::set<unsigned> &InputRegs) {
@@ -5150,7 +4861,7 @@ hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
 
 /// visitInlineAsm - Handle a call to an InlineAsm object.
 ///
-void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
+void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
   InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
 
   /// ConstraintOperands - Information about all of the constraints.
@@ -5572,14 +5283,14 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
   DAG.setRoot(Chain);
 }
 
-void SelectionDAGLowering::visitVAStart(CallInst &I) {
+void SelectionDAGBuilder::visitVAStart(CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
+void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)));
@@ -5587,14 +5298,14 @@ void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
   DAG.setRoot(V.getValue(1));
 }
 
-void SelectionDAGLowering::visitVAEnd(CallInst &I) {
+void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGLowering::visitVACopy(CallInst &I) {
+void SelectionDAGBuilder::visitVACopy(CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
@@ -5787,7 +5498,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 }
 
 
-void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
   SDValue Op = getValue(V);
   assert((Op.getOpcode() != ISD::CopyFromReg ||
           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
@@ -5805,9 +5516,9 @@ void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
 void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
   Function &F = *LLVMBB->getParent();
-  SelectionDAG &DAG = SDL->DAG;
+  SelectionDAG &DAG = SDB->DAG;
   SDValue OldRoot = DAG.getRoot();
-  DebugLoc dl = SDL->getCurDebugLoc();
+  DebugLoc dl = SDB->getCurDebugLoc();
   const TargetData *TD = TLI.getTargetData();
   SmallVector<ISD::InputArg, 16> Ins;
 
@@ -5923,11 +5634,11 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
     SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
                                         VT, AssertOp);
 
-    MachineFunction& MF = SDL->DAG.getMachineFunction();
+    MachineFunction& MF = SDB->DAG.getMachineFunction();
     MachineRegisterInfo& RegInfo = MF.getRegInfo();
     unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
     FLI.DemoteRegister = SRetReg;
-    NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue);
+    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue);
     DAG.setRoot(NewRoot);
     
     // i indexes lowered arguments.  Bump it past the hidden sret argument.
@@ -5958,18 +5669,18 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
       i += NumParts;
     }
     if (!I->use_empty()) {
-      SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
-                                          SDL->getCurDebugLoc()));
+      SDB->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
+                                          SDB->getCurDebugLoc()));
       // If this argument is live outside of the entry block, insert a copy from
       // whereever we got it to the vreg that other BB's will reference it as.
-      SDL->CopyToExportRegsIfNeeded(I);
+      SDB->CopyToExportRegsIfNeeded(I);
     }
   }
   assert(i == InVals.size() && "Argument register count mismatch!");
 
   // Finally, if the target has anything special to do, allow it to do so.
   // FIXME: this should insert code into the DAG!
-  EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction());
+  EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
 }
 
 /// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
@@ -6011,10 +5722,10 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
       Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
 
       if (Constant *C = dyn_cast<Constant>(PHIOp)) {
-        unsigned &RegOut = SDL->ConstantsOut[C];
+        unsigned &RegOut = SDB->ConstantsOut[C];
         if (RegOut == 0) {
           RegOut = FuncInfo->CreateRegForValue(C);
-          SDL->CopyValueToVirtualRegister(C, RegOut);
+          SDB->CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
       } else {
@@ -6024,7 +5735,7 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
                  FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
           Reg = FuncInfo->CreateRegForValue(PHIOp);
-          SDL->CopyValueToVirtualRegister(PHIOp, Reg);
+          SDB->CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
 
@@ -6036,12 +5747,12 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
         EVT VT = ValueVTs[vti];
         unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
-          SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+          SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
         Reg += NumRegisters;
       }
     }
   }
-  SDL->ConstantsOut.clear();
+  SDB->ConstantsOut.clear();
 }
 
 /// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
@@ -6054,7 +5765,7 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
   TerminatorInst *TI = LLVMBB->getTerminator();
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
-  unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size();
+  unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
 
   // Check successor nodes' PHI nodes that expect a constant to be available
   // from this block.
@@ -6090,7 +5801,7 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
         if (VT == MVT::i1)
           VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
         else {
-          SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
           return false;
         }
       }
@@ -6099,10 +5810,10 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
 
       unsigned Reg = F->getRegForValue(PHIOp);
       if (Reg == 0) {
-        SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
         return false;
       }
-      SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+      SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
     }
   }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 10f256c..244f9b5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===//
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef SELECTIONDAGBUILD_H
-#define SELECTIONDAGBUILD_H
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
 
 #include "llvm/Constants.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -25,7 +25,6 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
 #include <vector>
 #include <set>
 
@@ -45,6 +44,7 @@ class FPToSIInst;
 class FPToUIInst;
 class FPTruncInst;
 class Function;
+class FunctionLoweringInfo;
 class GetElementPtrInst;
 class GCFunctionInfo;
 class ICmpInst;
@@ -58,7 +58,6 @@ class LoadInst;
 class MachineBasicBlock;
 class MachineFunction;
 class MachineInstr;
-class MachineModuleInfo;
 class MachineRegisterInfo;
 class PHINode;
 class PtrToIntInst;
@@ -79,98 +78,12 @@ class UnwindInst;
 class VAArgInst;
 class ZExtInst;
 
-//===--------------------------------------------------------------------===//
-/// FunctionLoweringInfo - This contains information that is global to a
-/// function that is used when lowering a region of the function.
-///
-class FunctionLoweringInfo {
-public:
-  TargetLowering &TLI;
-  Function *Fn;
-  MachineFunction *MF;
-  MachineRegisterInfo *RegInfo;
-
-  /// CanLowerReturn - true iff the function's return value can be lowered to
-  /// registers.
-  bool CanLowerReturn;
-
-  /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
-  /// allocated to hold a pointer to the hidden sret parameter.
-  unsigned DemoteRegister;
-
-  explicit FunctionLoweringInfo(TargetLowering &TLI);
-
-  /// set - Initialize this FunctionLoweringInfo with the given Function
-  /// and its associated MachineFunction.
-  ///
-  void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG,
-           bool EnableFastISel);
-
-  /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
-  DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
-
-  /// ValueMap - Since we emit code for the function a basic block at a time,
-  /// we must remember which virtual registers hold the values for
-  /// cross-basic-block values.
-  DenseMap<const Value*, unsigned> ValueMap;
-
-  /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
-  /// the entry block.  This allows the allocas to be efficiently referenced
-  /// anywhere in the function.
-  DenseMap<const AllocaInst*, int> StaticAllocaMap;
-
-#ifndef NDEBUG
-  SmallSet<Instruction*, 8> CatchInfoLost;
-  SmallSet<Instruction*, 8> CatchInfoFound;
-#endif
-
-  unsigned MakeReg(EVT VT);
-  
-  /// isExportedInst - Return true if the specified value is an instruction
-  /// exported from its block.
-  bool isExportedInst(const Value *V) {
-    return ValueMap.count(V);
-  }
-
-  unsigned CreateRegForValue(const Value *V);
-  
-  unsigned InitializeRegForValue(const Value *V) {
-    unsigned &R = ValueMap[V];
-    assert(R == 0 && "Already initialized this value register!");
-    return R = CreateRegForValue(V);
-  }
-  
-  struct LiveOutInfo {
-    unsigned NumSignBits;
-    APInt KnownOne, KnownZero;
-    LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
-  };
-  
-  /// LiveOutRegInfo - Information about live out vregs, indexed by their
-  /// register number offset by 'FirstVirtualRegister'.
-  std::vector<LiveOutInfo> LiveOutRegInfo;
-
-  /// clear - Clear out all the function-specific state. This returns this
-  /// FunctionLoweringInfo to an empty state, ready to be used for a
-  /// different function.
-  void clear() {
-    MBBMap.clear();
-    ValueMap.clear();
-    StaticAllocaMap.clear();
-#ifndef NDEBUG
-    CatchInfoLost.clear();
-    CatchInfoFound.clear();
-#endif
-    LiveOutRegInfo.clear();
-  }
-};
-
 //===----------------------------------------------------------------------===//
-/// SelectionDAGLowering - This is the common target-independent lowering
+/// SelectionDAGBuilder - This is the common target-independent lowering
 /// implementation that is parameterized by a TargetLowering object.
 /// Also, targets can overload any lowering method.
 ///
-class SelectionDAGLowering {
+class SelectionDAGBuilder {
   MachineBasicBlock *CurMBB;
 
   /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
@@ -260,9 +173,9 @@ class SelectionDAGLowering {
 
   size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);
 
-  /// CaseBlock - This structure is used to communicate between SDLowering and
-  /// SDISel for the code generation of additional basic blocks needed by multi-
-  /// case switch statements.
+  /// CaseBlock - This structure is used to communicate between
+  /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+  /// blocks needed by multi-case switch statements.
   struct CaseBlock {
     CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
               MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
@@ -384,9 +297,9 @@ public:
 
   LLVMContext *Context;
 
-  SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
-                       FunctionLoweringInfo &funcinfo,
-                       CodeGenOpt::Level ol)
+  SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
+                      FunctionLoweringInfo &funcinfo,
+                      CodeGenOpt::Level ol)
     : CurDebugLoc(DebugLoc::getUnknownLoc()), 
       TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
       HasTailCall(false),
@@ -396,7 +309,7 @@ public:
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
 
   /// clear - Clear out the curret SelectionDAG and the associated
-  /// state and prepare this SelectionDAGLowering object to be used
+  /// state and prepare this SelectionDAGBuilder object to be used
   /// for a new block. This doesn't clear out information about
   /// additional blocks that are needed to complete switch lowering
   /// or PHI node updating; that information is cleared out as it is
@@ -569,11 +482,6 @@ private:
   const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
 };
 
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
-                  MachineBasicBlock *MBB);
-
 } // end namespace llvm
 
 #endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index ab5f21e..c39437f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -13,7 +13,8 @@
 
 #define DEBUG_TYPE "isel"
 #include "ScheduleDAGSDNodes.h"
-#include "SelectionDAGBuild.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
@@ -279,14 +280,14 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
   MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
   CurDAG(new SelectionDAG(TLI, *FuncInfo)),
-  SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)),
+  SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
   DAGSize(0)
 {}
 
 SelectionDAGISel::~SelectionDAGISel() {
-  delete SDL;
+  delete SDB;
   delete CurDAG;
   delete FuncInfo;
 }
@@ -331,8 +332,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
   CurDAG->init(*MF, MMI, DW);
-  FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel);
-  SDL->init(GFI, *AA);
+  FuncInfo->set(Fn, *MF, EnableFastISel);
+  SDB->init(GFI, *AA);
 
   for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
     if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
@@ -361,29 +362,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   return true;
 }
 
-static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
-                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
-    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
-      // Apply the catch info to DestBB.
-      AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
-#ifndef NDEBUG
-      if (!FLI.MBBMap[SrcBB]->isLandingPad())
-        FLI.CatchInfoFound.insert(EHSel);
-#endif
-    }
-}
-
 void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
                                         BasicBlock::iterator Begin,
-                                        BasicBlock::iterator End) {
-  SDL->setCurrentBasicBlock(BB);
+                                        BasicBlock::iterator End,
+                                        bool &HadTailCall) {
+  SDB->setCurrentBasicBlock(BB);
   MetadataContext &TheMetadata = LLVMBB->getParent()->getContext().getMetadata();
   unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
 
   // Lower all of the non-terminator instructions. If a call is emitted
   // as a tail call, cease emitting nodes for this block.
-  for (BasicBlock::iterator I = Begin; I != End && !SDL->HasTailCall; ++I) {
+  for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
     if (MDDbgKind) {
       // Update DebugLoc if debug information is attached with this
       // instruction.
@@ -391,37 +380,38 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
         if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
           DILocation DILoc(Dbg);
           DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
-          SDL->setCurDebugLoc(Loc);
+          SDB->setCurDebugLoc(Loc);
           if (MF->getDefaultDebugLoc().isUnknown())
             MF->setDefaultDebugLoc(Loc);
         }
     }
     if (!isa<TerminatorInst>(I))
-      SDL->visit(*I);
+      SDB->visit(*I);
   }
 
-  if (!SDL->HasTailCall) {
+  if (!SDB->HasTailCall) {
     // Ensure that all instructions which are used outside of their defining
     // blocks are available as virtual registers.  Invoke is handled elsewhere.
     for (BasicBlock::iterator I = Begin; I != End; ++I)
       if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
-        SDL->CopyToExportRegsIfNeeded(I);
+        SDB->CopyToExportRegsIfNeeded(I);
 
     // Handle PHI nodes in successor blocks.
     if (End == LLVMBB->end()) {
       HandlePHINodesInSuccessorBlocks(LLVMBB);
 
       // Lower the terminator after the copies are emitted.
-      SDL->visit(*LLVMBB->getTerminator());
+      SDB->visit(*LLVMBB->getTerminator());
     }
   }
 
   // Make sure the root of the DAG is up-to-date.
-  CurDAG->setRoot(SDL->getControlRoot());
+  CurDAG->setRoot(SDB->getControlRoot());
 
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
-  SDL->clear();
+  HadTailCall = SDB->HasTailCall;
+  SDB->clear();
 }
 
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -629,9 +619,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // inserted into.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Creation", GroupName);
-    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
+    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
   } else {
-    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
+    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
   }
 
   // Free the scheduler state.
@@ -701,7 +691,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       unsigned LabelID = MMI->addLandingPad(BB);
 
       const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL);
-      BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID);
+      BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID);
 
       // Mark exception register as live in.
       unsigned Reg = TLI.getExceptionAddressRegister();
@@ -732,7 +722,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 
         if (I == E)
           // No catch info found - try to extract some from the successor.
-          copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
+          CopyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
       }
     }
 
@@ -741,9 +731,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
-        CurDAG->setRoot(SDL->getControlRoot());
+        CurDAG->setRoot(SDB->getControlRoot());
         CodeGenAndEmitDAG();
-        SDL->clear();
+        SDB->clear();
       }
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
@@ -796,8 +786,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
               R = FuncInfo->CreateRegForValue(BI);
           }
 
-          SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
-          SelectBasicBlock(LLVMBB, BI, next(BI));
+          SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
+
+          bool HadTailCall = false;
+          SelectBasicBlock(LLVMBB, BI, next(BI), HadTailCall);
+
+          // If the call was emitted as a tail call, we're done with the block.
+          if (HadTailCall) {
+            BI = End;
+            break;
+          }
+
           // If the instruction was codegen'd with multiple blocks,
           // inform the FastISel object where to resume inserting.
           FastIS->setCurrentBlock(BB);
@@ -826,8 +825,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
     if (BI != End) {
       // If FastISel is run and it has known DebugLoc then use it.
       if (FastIS && !FastIS->getCurDebugLoc().isUnknown())
-        SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
-      SelectBasicBlock(LLVMBB, BI, End);
+        SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
+      bool HadTailCall;
+      SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
     }
 
     FinishBasicBlock();
@@ -843,150 +843,150 @@ SelectionDAGISel::FinishBasicBlock() {
   DEBUG(BB->dump());
 
   DEBUG(errs() << "Total amount of phi nodes to update: "
-               << SDL->PHINodesToUpdate.size() << "\n");
-  DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i)
+               << SDB->PHINodesToUpdate.size() << "\n");
+  DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
           errs() << "Node " << i << " : ("
-                 << SDL->PHINodesToUpdate[i].first
-                 << ", " << SDL->PHINodesToUpdate[i].second << ")\n");
+                 << SDB->PHINodesToUpdate[i].first
+                 << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
 
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
-  if (SDL->SwitchCases.empty() &&
-      SDL->JTCases.empty() &&
-      SDL->BitTestCases.empty()) {
-    for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+  if (SDB->SwitchCases.empty() &&
+      SDB->JTCases.empty() &&
+      SDB->BitTestCases.empty()) {
+    for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
       assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
              "This is not a machine PHI node that we are updating!");
-      PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
-    SDL->PHINodesToUpdate.clear();
+    SDB->PHINodesToUpdate.clear();
     return;
   }
 
-  for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
     // Lower header first, if it wasn't already lowered
-    if (!SDL->BitTestCases[i].Emitted) {
+    if (!SDB->BitTestCases[i].Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
-      BB = SDL->BitTestCases[i].Parent;
-      SDL->setCurrentBasicBlock(BB);
+      BB = SDB->BitTestCases[i].Parent;
+      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDL->visitBitTestHeader(SDL->BitTestCases[i]);
-      CurDAG->setRoot(SDL->getRoot());
+      SDB->visitBitTestHeader(SDB->BitTestCases[i]);
+      CurDAG->setRoot(SDB->getRoot());
       CodeGenAndEmitDAG();
-      SDL->clear();
+      SDB->clear();
     }
 
-    for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) {
+    for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
       // Set the current basic block to the mbb we wish to insert the code into
-      BB = SDL->BitTestCases[i].Cases[j].ThisBB;
-      SDL->setCurrentBasicBlock(BB);
+      BB = SDB->BitTestCases[i].Cases[j].ThisBB;
+      SDB->setCurrentBasicBlock(BB);
       // Emit the code
       if (j+1 != ej)
-        SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB,
-                              SDL->BitTestCases[i].Reg,
-                              SDL->BitTestCases[i].Cases[j]);
+        SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j]);
       else
-        SDL->visitBitTestCase(SDL->BitTestCases[i].Default,
-                              SDL->BitTestCases[i].Reg,
-                              SDL->BitTestCases[i].Cases[j]);
+        SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j]);
 
 
-      CurDAG->setRoot(SDL->getRoot());
+      CurDAG->setRoot(SDB->getRoot());
       CodeGenAndEmitDAG();
-      SDL->clear();
+      SDB->clear();
     }
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
              "This is not a machine PHI node that we are updating!");
       // This is "default" BB. We have two jumps to it. From "header" BB and
       // from last "case" BB.
-      if (PHIBB == SDL->BitTestCases[i].Default) {
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+      if (PHIBB == SDB->BitTestCases[i].Default) {
+        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
                                                   false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent));
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
+        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
                                                   false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases.
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
                                                   back().ThisBB));
       }
       // One of "cases" BB.
-      for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size();
+      for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
            j != ej; ++j) {
-        MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB;
+        MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
         if (cBB->succ_end() !=
             std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
-          PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+          PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
                                                     false));
           PHI->addOperand(MachineOperand::CreateMBB(cBB));
         }
       }
     }
   }
-  SDL->BitTestCases.clear();
+  SDB->BitTestCases.clear();
 
   // If the JumpTable record is filled in, then we need to emit a jump table.
   // Updating the PHI nodes is tricky in this case, since we need to determine
   // whether the PHI is a successor of the range check MBB or the jump table MBB
-  for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
     // Lower header first, if it wasn't already lowered
-    if (!SDL->JTCases[i].first.Emitted) {
+    if (!SDB->JTCases[i].first.Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
-      BB = SDL->JTCases[i].first.HeaderBB;
-      SDL->setCurrentBasicBlock(BB);
+      BB = SDB->JTCases[i].first.HeaderBB;
+      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first);
-      CurDAG->setRoot(SDL->getRoot());
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first);
+      CurDAG->setRoot(SDB->getRoot());
       CodeGenAndEmitDAG();
-      SDL->clear();
+      SDB->clear();
     }
 
     // Set the current basic block to the mbb we wish to insert the code into
-    BB = SDL->JTCases[i].second.MBB;
-    SDL->setCurrentBasicBlock(BB);
+    BB = SDB->JTCases[i].second.MBB;
+    SDB->setCurrentBasicBlock(BB);
     // Emit the code
-    SDL->visitJumpTable(SDL->JTCases[i].second);
-    CurDAG->setRoot(SDL->getRoot());
+    SDB->visitJumpTable(SDB->JTCases[i].second);
+    CurDAG->setRoot(SDB->getRoot());
     CodeGenAndEmitDAG();
-    SDL->clear();
+    SDB->clear();
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
-      if (PHIBB == SDL->JTCases[i].second.Default) {
+      if (PHIBB == SDB->JTCases[i].second.Default) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand
-          (MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
+          (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
       if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
       }
     }
   }
-  SDL->JTCases.clear();
+  SDB->JTCases.clear();
 
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
-  for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+  for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
     assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
            "This is not a machine PHI node that we are updating!");
     if (BB->isSuccessor(PHI->getParent())) {
-      PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
@@ -994,36 +994,36 @@ SelectionDAGISel::FinishBasicBlock() {
 
   // If we generated any switch lowering information, build and codegen any
   // additional DAGs necessary.
-  for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
-    MachineBasicBlock *ThisBB = BB = SDL->SwitchCases[i].ThisBB;
-    SDL->setCurrentBasicBlock(BB);
+    MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
+    SDB->setCurrentBasicBlock(BB);
 
     // Emit the code
-    SDL->visitSwitchCase(SDL->SwitchCases[i]);
-    CurDAG->setRoot(SDL->getRoot());
+    SDB->visitSwitchCase(SDB->SwitchCases[i]);
+    CurDAG->setRoot(SDB->getRoot());
     CodeGenAndEmitDAG();
 
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
     // occur multiple times in PHINodesToUpdate.  We have to be very careful to
     // handle them the right number of times.
-    while ((BB = SDL->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
+    while ((BB = SDB->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
       // If new BB's are created during scheduling, the edges may have been
       // updated. That is, the edge from ThisBB to BB may have been split and
       // BB's predecessor is now another block.
       DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
-        SDL->EdgeMapping.find(BB);
-      if (EI != SDL->EdgeMapping.end())
+        SDB->EdgeMapping.find(BB);
+      if (EI != SDB->EdgeMapping.end())
         ThisBB = EI->second;
       for (MachineBasicBlock::iterator Phi = BB->begin();
            Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
         // This value for this PHI node is recorded in PHINodesToUpdate, get it.
         for (unsigned pn = 0; ; ++pn) {
-          assert(pn != SDL->PHINodesToUpdate.size() &&
+          assert(pn != SDB->PHINodesToUpdate.size() &&
                  "Didn't find PHI entry!");
-          if (SDL->PHINodesToUpdate[pn].first == Phi) {
-            Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn].
+          if (SDB->PHINodesToUpdate[pn].first == Phi) {
+            Phi->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pn].
                                                       second, false));
             Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
             break;
@@ -1032,19 +1032,19 @@ SelectionDAGISel::FinishBasicBlock() {
       }
 
       // Don't process RHS if same block as LHS.
-      if (BB == SDL->SwitchCases[i].FalseBB)
-        SDL->SwitchCases[i].FalseBB = 0;
+      if (BB == SDB->SwitchCases[i].FalseBB)
+        SDB->SwitchCases[i].FalseBB = 0;
 
       // If we haven't handled the RHS, do so now.  Otherwise, we're done.
-      SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB;
-      SDL->SwitchCases[i].FalseBB = 0;
+      SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB;
+      SDB->SwitchCases[i].FalseBB = 0;
     }
-    assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0);
-    SDL->clear();
+    assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0);
+    SDB->clear();
   }
-  SDL->SwitchCases.clear();
+  SDB->SwitchCases.clear();
 
-  SDL->PHINodesToUpdate.clear();
+  SDB->PHINodesToUpdate.clear();
 }
 
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index ccc5e3c..c5adc50 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -35,6 +35,9 @@ using namespace llvm;
 namespace llvm {
   template<>
   struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+    DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
     static bool hasEdgeDestLabels() {
       return true;
     }
@@ -48,8 +51,8 @@ namespace llvm {
     }
 
     /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
-    /// should actually target another edge source, not a node.  If this method is
-    /// implemented, getEdgeTarget should be implemented.
+    /// should actually target another edge source, not a node.  If this method
+    /// is implemented, getEdgeTarget should be implemented.
     template<typename EdgeIter>
     static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
       return true;
@@ -93,9 +96,16 @@ namespace llvm {
     }
     
 
-    static std::string getNodeLabel(const SDNode *Node,
-                                    const SelectionDAG *Graph,
-                                    bool ShortNames);
+    static std::string getSimpleNodeLabel(const SDNode *Node,
+                                          const SelectionDAG *G) {
+      std::string Result = Node->getOperationName(G);
+      {
+        raw_string_ostream OS(Result);
+        Node->print_details(OS, G);
+      }
+      return Result;
+    }
+    std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
     static std::string getNodeAttributes(const SDNode *N,
                                          const SelectionDAG *Graph) {
 #ifndef NDEBUG
@@ -121,14 +131,8 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
-                                                        const SelectionDAG *G,
-                                                        bool ShortNames) {
-  std::string Result = Node->getOperationName(G);
-  {
-    raw_string_ostream OS(Result);
-    Node->print_details(OS, G);
-  }
-  return Result;
+                                                        const SelectionDAG *G) {
+  return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel (Node, G);
 }
 
 
@@ -269,8 +273,8 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
     for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
       FlaggedNodes.push_back(N);
     while (!FlaggedNodes.empty()) {
-      O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(),
-                                                       DAG, false);
+      O << DOTGraphTraits<SelectionDAG*>
+	     ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
       FlaggedNodes.pop_back();
       if (!FlaggedNodes.empty())
         O << "\n    ";
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2ca52a4..68bc2d6 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -532,11 +532,6 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
   InitLibcallCallingConvs(LibcallCallingConvs);
-
-  // Tell Legalize whether the assembler supports DEBUG_LOC.
-  const MCAsmInfo *TASM = TM.getMCAsmInfo();
-  if (!TASM || !TASM->hasDotLocAndDotFile())
-    setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 }
 
 TargetLowering::~TargetLowering() {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 3909c56..5876371 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -2371,16 +2371,26 @@ namespace {
   struct DepthMBBCompare {
     typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
     bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
-      if (LHS.first > RHS.first) return true;   // Deeper loops first
-      return LHS.first == RHS.first &&
-        LHS.second->getNumber() < RHS.second->getNumber();
+      // Deeper loops first
+      if (LHS.first != RHS.first)
+        return LHS.first > RHS.first;
+
+      // Prefer blocks that are more connected in the CFG. This takes care of
+      // the most difficult copies first while intervals are short.
+      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+      if (cl != cr)
+        return cl > cr;
+
+      // As a last resort, sort by block number.
+      return LHS.second->getNumber() < RHS.second->getNumber();
     }
   };
 }
 
 void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
-  DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
+  DEBUG(errs() << MBB->getName() << ":\n");
 
   std::vector<CopyRec> VirtCopies;
   std::vector<CopyRec> PhysCopies;
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 20c4a28..237d0b5 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -12,7 +12,6 @@
 #include "Spiller.h"
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -47,16 +46,14 @@ protected:
 
   MachineFunction *mf;
   LiveIntervals *lis;
-  LiveStacks *ls;
   MachineFrameInfo *mfi;
   MachineRegisterInfo *mri;
   const TargetInstrInfo *tii;
   VirtRegMap *vrm;
   
   /// Construct a spiller base. 
-  SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
-              VirtRegMap *vrm) :
-    mf(mf), lis(lis), ls(ls), vrm(vrm)
+  SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+    : mf(mf), lis(lis), vrm(vrm)
   {
     mfi = mf->getFrameInfo();
     mri = &mf->getRegInfo();
@@ -169,9 +166,8 @@ protected:
 class TrivialSpiller : public SpillerBase {
 public:
 
-  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
-                 VirtRegMap *vrm)
-    : SpillerBase(mf, lis, ls, vrm) {}
+  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+    : SpillerBase(mf, lis, vrm) {}
 
   std::vector<LiveInterval*> spill(LiveInterval *li,
                                    SmallVectorImpl<LiveInterval*> &spillIs) {
@@ -188,7 +184,7 @@ private:
   const MachineLoopInfo *loopInfo;
   VirtRegMap *vrm;
 public:
-  StandardSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+  StandardSpiller(MachineFunction *mf, LiveIntervals *lis,
                   const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
     : lis(lis), loopInfo(loopInfo), vrm(vrm) {}
 
@@ -203,12 +199,11 @@ public:
 }
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
-                                   LiveStacks *ls,
                                    const MachineLoopInfo *loopInfo,
                                    VirtRegMap *vrm) {
   switch (spillerOpt) {
-    case trivial: return new TrivialSpiller(mf, lis, ls, vrm); break;
-    case standard: return new StandardSpiller(mf, lis, ls, loopInfo, vrm); break;
+    case trivial: return new TrivialSpiller(mf, lis, vrm); break;
+    case standard: return new StandardSpiller(mf, lis, loopInfo, vrm); break;
     default: llvm_unreachable("Unreachable!"); break;
   }
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 7ec8e6d..c6bd985 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -41,8 +41,7 @@ namespace llvm {
 
   /// Create and return a spiller object, as specified on the command line.
   Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
-                         LiveStacks *ls, const MachineLoopInfo *loopInfo,
-                         VirtRegMap *vrm);
+                         const MachineLoopInfo *loopInfo, VirtRegMap *vrm);
 }
 
 #endif
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 0000000..9c0b596
--- /dev/null
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,249 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+                  cl::desc("Maximum instructions to consider tail duplicating"),
+                  cl::init(2), cl::Hidden);
+
+namespace {
+  /// TailDuplicatePass - Perform tail duplication.
+  class TailDuplicatePass : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    MachineModuleInfo *MMI;
+
+  public:
+    static char ID;
+    explicit TailDuplicatePass() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Tail Duplication"; }
+
+  private:
+    bool TailDuplicateBlocks(MachineFunction &MF);
+    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+  };
+
+  char TailDuplicatePass::ID = 0;
+}
+
+FunctionPass *llvm::createTailDuplicatePass() {
+  return new TailDuplicatePass();
+}
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+  bool MadeChange = false;
+  bool MadeChangeThisIteration = true;
+  while (MadeChangeThisIteration) {
+    MadeChangeThisIteration = false;
+    MadeChangeThisIteration |= TailDuplicateBlocks(MF);
+    MadeChange |= MadeChangeThisIteration;
+  }
+
+  return MadeChange;
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+
+    // Only duplicate blocks that end with unconditional branches.
+    if (MBB->canFallThrough())
+      continue;
+
+    MadeChange |= TailDuplicate(MBB, MF);
+
+    // If it is dead, remove it.
+    if (MBB->pred_empty()) {
+      NumInstrDups -= MBB->size();
+      RemoveDeadBlock(MBB);
+      MadeChange = true;
+      ++NumDeadBlocks;
+    }
+  }
+  return MadeChange;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+                                        MachineFunction &MF) {
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB->isSuccessor(TailBB))
+    return false;
+
+  // Set the limit on the number of instructions to duplicate, with a default
+  // of one less than the tail-merge threshold. When optimizing for size,
+  // duplicate only one, because one branch instruction can be eliminated to
+  // compensate for the duplication.
+  unsigned MaxDuplicateCount;
+  if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch())
+    // If the target has hardware branch prediction that can handle indirect
+    // branches, duplicating them can often make them predictable when there
+    // are common paths through the code.  The limit needs to be high enough
+    // to allow undoing the effects of tail merging.
+    MaxDuplicateCount = 20;
+  else if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    MaxDuplicateCount = 1;
+  else
+    MaxDuplicateCount = TailDuplicateSize;
+
+  // Check the instructions in the block to determine whether tail-duplication
+  // is invalid or unlikely to be profitable.
+  unsigned i = 0;
+  bool HasCall = false;
+  for (MachineBasicBlock::iterator I = TailBB->begin();
+       I != TailBB->end(); ++I, ++i) {
+    // Non-duplicable things shouldn't be tail-duplicated.
+    if (I->getDesc().isNotDuplicable()) return false;
+    // Don't duplicate more than the threshold.
+    if (i == MaxDuplicateCount) return false;
+    // Remember if we saw a call.
+    if (I->getDesc().isCall()) HasCall = true;
+  }
+  // Heuristically, don't tail-duplicate calls if it would expand code size,
+  // as it's less likely to be worth the extra cost.
+  if (i > 1 && HasCall)
+    return false;
+
+  // Iterate through all the unique predecessors and tail-duplicate this
+  // block into them, if possible. Copying the list ahead of time also
+  // avoids trouble with the predecessor list reallocating.
+  bool Changed = false;
+  SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+                                               TailBB->pred_end());
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    assert(TailBB != PredBB &&
+           "Single-block loop should have been rejected earlier!");
+    if (PredBB->succ_size() > 1) continue;
+
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+    if (!PredCond.empty())
+      continue;
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() != 1)
+      continue;
+    // Don't duplicate into a fall-through predecessor (at least for now).
+    if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+      continue;
+
+    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From Succ: " << *TailBB);
+
+    // Remove PredBB's unconditional branch.
+    TII->RemoveBranch(*PredBB);
+    // Clone the contents of TailBB into PredBB.
+    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+         I != E; ++I) {
+      MachineInstr *NewMI = MF.CloneMachineInstr(I);
+      PredBB->insert(PredBB->end(), NewMI);
+    }
+    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+    // Update the CFG.
+    PredBB->removeSuccessor(PredBB->succ_begin());
+    assert(PredBB->succ_empty() &&
+           "TailDuplicate called on block with multiple successors!");
+    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+         E = TailBB->succ_end(); I != E; ++I)
+       PredBB->addSuccessor(*I);
+
+    Changed = true;
+    ++NumTailDups;
+  }
+
+  // If TailBB was duplicated into all its predecessors except for the prior
+  // block, which falls through unconditionally, move the contents of this
+  // block into the prior block.
+  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  SmallVector<MachineOperand, 4> PriorCond;
+  bool PriorUnAnalyzable =
+    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+  // This has to check PrevBB->succ_size() because EH edges are ignored by
+  // AnalyzeBranch.
+  if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
+      TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
+      !TailBB->hasAddressTaken()) {
+    DEBUG(errs() << "\nMerging into block: " << PrevBB
+          << "From MBB: " << *TailBB);
+    PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
+    PrevBB.removeSuccessor(PrevBB.succ_begin());;
+    assert(PrevBB.succ_empty());
+    PrevBB.transferSuccessors(TailBB);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+  assert(MBB->pred_empty() && "MBB must be dead!");
+  DEBUG(errs() << "\nRemoving MBB: " << *MBB);
+
+  // Remove all successors.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_end()-1);
+
+  // If there are any labels in the basic block, unregister them from
+  // MachineModuleInfo.
+  if (MMI && !MBB->empty()) {
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      if (I->isLabel())
+        // The label ID # is always operand #0, an immediate.
+        MMI->InvalidateLabel(I->getOperand(0).getImm());
+    }
+  }
+
+  // Remove the block.
+  MBB->eraseFromParent();
+}
+
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index c836286..10c8066 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -1600,7 +1600,7 @@ private:
                   std::vector<MachineOperand*> &KillOps) {
 
     DEBUG(errs() << "\n**** Local spiller rewriting MBB '"
-          << MBB.getBasicBlock()->getName() << "':\n");
+          << MBB.getName() << "':\n");
 
     MachineFunction &MF = *MBB.getParent();
     
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index bb0eb7b..3e6e050 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -471,10 +471,10 @@ namespace llvm {
   struct DOTGraphTraits<llvmc::CompilationGraph*>
     : public DefaultDOTGraphTraits
   {
+    DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     template<typename GraphType>
-    static std::string getNodeLabel(const Node* N, const GraphType&,
-                                    bool ShortNames)
+    static std::string getNodeLabel(const Node* N, const GraphType&)
     {
       if (N->ToolPtr)
         if (N->ToolPtr->IsJoin())
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 5f195ee..bbac762 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -83,15 +83,15 @@ namespace {
   class JITResolverState {
   public:
     typedef ValueMap<Function*, void*, NoRAUWValueMapConfig<Function*> >
-      FunctionToStubMapTy;
+      FunctionToLazyStubMapTy;
     typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy;
     typedef ValueMap<Function *, SmallPtrSet<void*, 1>,
                      CallSiteValueMapConfig> FunctionToCallSitesMapTy;
     typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
   private:
-    /// FunctionToStubMap - Keep track of the stub created for a particular
-    /// function so that we can reuse them if necessary.
-    FunctionToStubMapTy FunctionToStubMap;
+    /// FunctionToLazyStubMap - Keep track of the lazy stub created for a
+    /// particular function so that we can reuse them if necessary.
+    FunctionToLazyStubMapTy FunctionToLazyStubMap;
 
     /// CallSiteToFunctionMap - Keep track of the function that each lazy call
     /// site corresponds to, and vice versa.
@@ -103,12 +103,13 @@ namespace {
     GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
 
   public:
-    JITResolverState() : FunctionToStubMap(this),
+    JITResolverState() : FunctionToLazyStubMap(this),
                          FunctionToCallSitesMap(this) {}
 
-    FunctionToStubMapTy& getFunctionToStubMap(const MutexGuard& locked) {
+    FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
+      const MutexGuard& locked) {
       assert(locked.holds(TheJIT->lock));
-      return FunctionToStubMap;
+      return FunctionToLazyStubMap;
     }
 
     GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) {
@@ -154,11 +155,11 @@ namespace {
 
       Function *const F = C2F_I->second;
 #ifndef NDEBUG
-      void *RealStub = FunctionToStubMap.lookup(F);
+      void *RealStub = FunctionToLazyStubMap.lookup(F);
       assert(RealStub == Stub &&
              "Call-site that wasn't a stub pass in to EraseStub");
 #endif
-      FunctionToStubMap.erase(F);
+      FunctionToLazyStubMap.erase(F);
       CallSiteToFunctionMap.erase(C2F_I);
 
       // Remove the stub from the function->call-sites map, and remove the whole
@@ -196,7 +197,7 @@ namespace {
   /// JITResolver - Keep track of, and resolve, call sites for functions that
   /// have not yet been compiled.
   class JITResolver {
-    typedef JITResolverState::FunctionToStubMapTy FunctionToStubMapTy;
+    typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy;
     typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy;
     typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy;
 
@@ -206,8 +207,11 @@ namespace {
 
     JITResolverState state;
 
-    /// ExternalFnToStubMap - This is the equivalent of FunctionToStubMap for
-    /// external functions.
+    /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap
+    /// for external functions.  TODO: Of course, external functions don't need
+    /// a lazy stub.  It's actually here to make it more likely that far calls
+    /// succeed, but no single stub can guarantee that.  I'll remove this in a
+    /// subsequent checkin when I actually fix far calls.
     std::map<void*, void*> ExternalFnToStubMap;
 
     /// revGOTMap - map addresses to indexes in the GOT
@@ -230,14 +234,13 @@ namespace {
       TheJITResolver = 0;
     }
 
-    /// getFunctionStubIfAvailable - This returns a pointer to a function stub
-    /// if it has already been created.
-    void *getFunctionStubIfAvailable(Function *F);
+    /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
+    /// lazy-compilation stub if it has already been created.
+    void *getLazyFunctionStubIfAvailable(Function *F);
 
-    /// getFunctionStub - This returns a pointer to a function stub, creating
-    /// one on demand as needed.  If empty is true, create a function stub
-    /// pointing at address 0, to be filled in later.
-    void *getFunctionStub(Function *F);
+    /// getLazyFunctionStub - This returns a pointer to a function's
+    /// lazy-compilation stub, creating one on demand as needed.
+    void *getLazyFunctionStub(Function *F);
 
     /// getExternalFunctionStub - Return a stub for the function at the
     /// specified address, created lazily on demand.
@@ -268,10 +271,6 @@ namespace {
   class JITEmitter : public JITCodeEmitter {
     JITMemoryManager *MemMgr;
 
-    // When outputting a function stub in the context of some other function, we
-    // save BufferBegin/BufferEnd/CurBufferPtr here.
-    uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
-
     // When reattempting to JIT a function after running out of space, we store
     // the estimated size of the function we're trying to JIT here, so we can
     // ask the memory manager for at least this much space.  When we
@@ -397,11 +396,11 @@ namespace {
     void initJumpTableInfo(MachineJumpTableInfo *MJTI);
     void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
 
-    virtual void startGVStub(const GlobalValue* GV, unsigned StubSize,
-                                   unsigned Alignment = 1);
-    virtual void startGVStub(const GlobalValue* GV, void *Buffer,
+    virtual void startGVStub(BufferState &BS, const GlobalValue* GV,
+                             unsigned StubSize, unsigned Alignment = 1);
+    virtual void startGVStub(BufferState &BS, void *Buffer,
                              unsigned StubSize);
-    virtual void* finishGVStub(const GlobalValue *GV);
+    virtual void* finishGVStub(BufferState &BS);
 
     /// allocateSpace - Reserves space in the current block if any, or
     /// allocate a new one of the given size.
@@ -489,22 +488,22 @@ void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
   JRS->EraseAllCallSitesPrelocked(F);
 }
 
-/// getFunctionStubIfAvailable - This returns a pointer to a function stub
+/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
 /// if it has already been created.
-void *JITResolver::getFunctionStubIfAvailable(Function *F) {
+void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
   MutexGuard locked(TheJIT->lock);
 
   // If we already have a stub for this function, recycle it.
-  return state.getFunctionToStubMap(locked).lookup(F);
+  return state.getFunctionToLazyStubMap(locked).lookup(F);
 }
 
 /// getFunctionStub - This returns a pointer to a function stub, creating
 /// one on demand as needed.
-void *JITResolver::getFunctionStub(Function *F) {
+void *JITResolver::getLazyFunctionStub(Function *F) {
   MutexGuard locked(TheJIT->lock);
 
-  // If we already have a stub for this function, recycle it.
-  void *&Stub = state.getFunctionToStubMap(locked)[F];
+  // If we already have a lazy stub for this function, recycle it.
+  void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
   if (Stub) return Stub;
 
   // Call the lazy resolver function if we are JIT'ing lazily.  Otherwise we
@@ -522,9 +521,13 @@ void *JITResolver::getFunctionStub(Function *F) {
     if (!Actual) return 0;
   }
 
+  MachineCodeEmitter::BufferState BS;
+  TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+  JE.startGVStub(BS, F, SL.Size, SL.Alignment);
   // Codegen a new stub, calling the lazy resolver or the actual address of the
   // external function, if it was resolved.
   Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
+  JE.finishGVStub(BS);
 
   if (Actual != (void*)(intptr_t)LazyResolverFn) {
     // If we are getting the stub for an external function, we really want the
@@ -533,7 +536,7 @@ void *JITResolver::getFunctionStub(Function *F) {
     TheJIT->updateGlobalMapping(F, Stub);
   }
 
-  DEBUG(errs() << "JIT: Stub emitted at [" << Stub << "] for function '"
+  DEBUG(errs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
         << F->getName() << "'\n");
 
   // Finally, keep track of the stub-to-Function mapping so that the
@@ -576,7 +579,11 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) {
   void *&Stub = ExternalFnToStubMap[FnAddr];
   if (Stub) return Stub;
 
+  MachineCodeEmitter::BufferState BS;
+  TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+  JE.startGVStub(BS, 0, SL.Size, SL.Alignment);
   Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
+  JE.finishGVStub(BS);
 
   DEBUG(errs() << "JIT: Stub emitted at [" << Stub
                << "] for external function at '" << FnAddr << "'\n");
@@ -598,10 +605,10 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
                                     SmallVectorImpl<void*> &Ptrs) {
   MutexGuard locked(TheJIT->lock);
 
-  const FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked);
+  const FunctionToLazyStubMapTy &FM = state.getFunctionToLazyStubMap(locked);
   GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
 
-  for (FunctionToStubMapTy::const_iterator i = FM.begin(), e = FM.end();
+  for (FunctionToLazyStubMapTy::const_iterator i = FM.begin(), e = FM.end();
        i != e; ++i){
     Function *F = i->first;
     if (F->isDeclaration() && F->hasExternalLinkage()) {
@@ -727,32 +734,37 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
   // If we have already compiled the function, return a pointer to its body.
   Function *F = cast<Function>(V);
 
-  void *FnStub = Resolver.getFunctionStubIfAvailable(F);
+  void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F);
   if (FnStub) {
-    // Return the function stub if it's already created.  We do this first
-    // so that we're returning the same address for the function as any
-    // previous call.
+    // Return the function stub if it's already created.  We do this first so
+    // that we're returning the same address for the function as any previous
+    // call.  TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
+    // close enough to call.
     AddStubToCurrentFunction(FnStub);
     return FnStub;
   }
 
-  // Otherwise if we have code, go ahead and return that.
-  void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
-  if (ResultPtr) return ResultPtr;
+  // If we know the target can handle arbitrary-distance calls, try to
+  // return a direct pointer.
+  if (!MayNeedFarStub) {
+    // If we have code, go ahead and return that.
+    void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+    if (ResultPtr) return ResultPtr;
 
-  // If this is an external function pointer, we can force the JIT to
-  // 'compile' it, which really just adds it to the map.
-  if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode() &&
-      !MayNeedFarStub)
-    return TheJIT->getPointerToFunction(F);
+    // If this is an external function pointer, we can force the JIT to
+    // 'compile' it, which really just adds it to the map.
+    if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode())
+      return TheJIT->getPointerToFunction(F);
+  }
 
-  // Otherwise, we have to emit a stub.
-  void *StubAddr = Resolver.getFunctionStub(F);
+  // Otherwise, we may need a to emit a stub, and, conservatively, we
+  // always do so.
+  void *StubAddr = Resolver.getLazyFunctionStub(F);
 
   // Add the stub to the current function's list of referenced stubs, so we can
   // deallocate them if the current function is ever freed.  It's possible to
-  // return null from getFunctionStub in the case of a weak extern that fails
-  // to resolve.
+  // return null from getLazyFunctionStub in the case of a weak extern that
+  // fails to resolve.
   if (StubAddr)
     AddStubToCurrentFunction(StubAddr);
 
@@ -1203,9 +1215,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
 
   if (DwarfExceptionHandling || JITEmitDebugInfo) {
     uintptr_t ActualSize = 0;
-    SavedBufferBegin = BufferBegin;
-    SavedBufferEnd = BufferEnd;
-    SavedCurBufferPtr = CurBufferPtr;
+    BufferState BS;
+    SaveStateTo(BS);
 
     if (MemMgr->NeedsExactSize()) {
       ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd);
@@ -1221,9 +1232,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
                               FrameRegister);
     uint8_t *EhEnd = CurBufferPtr;
-    BufferBegin = SavedBufferBegin;
-    BufferEnd = SavedBufferEnd;
-    CurBufferPtr = SavedCurBufferPtr;
+    RestoreStateFrom(BS);
 
     if (DwarfExceptionHandling) {
       TheJIT->RegisterTable(FrameRegister);
@@ -1429,32 +1438,27 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
   }
 }
 
-void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize,
-                             unsigned Alignment) {
-  SavedBufferBegin = BufferBegin;
-  SavedBufferEnd = BufferEnd;
-  SavedCurBufferPtr = CurBufferPtr;
+void JITEmitter::startGVStub(BufferState &BS, const GlobalValue* GV,
+                             unsigned StubSize, unsigned Alignment) {
+  SaveStateTo(BS);
 
   BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
   BufferEnd = BufferBegin+StubSize+1;
 }
 
-void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer,
-                             unsigned StubSize) {
-  SavedBufferBegin = BufferBegin;
-  SavedBufferEnd = BufferEnd;
-  SavedCurBufferPtr = CurBufferPtr;
+void JITEmitter::startGVStub(BufferState &BS, void *Buffer, unsigned StubSize) {
+  SaveStateTo(BS);
 
   BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
   BufferEnd = BufferBegin+StubSize+1;
 }
 
-void *JITEmitter::finishGVStub(const GlobalValue* GV) {
+void *JITEmitter::finishGVStub(BufferState &BS) {
+  assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space.");
   NumBytes += getCurrentPCOffset();
-  std::swap(SavedBufferBegin, BufferBegin);
-  BufferEnd = SavedBufferEnd;
-  CurBufferPtr = SavedCurBufferPtr;
-  return SavedBufferBegin;
+  void *Result = BufferBegin;
+  RestoreStateFrom(BS);
+  return Result;
 }
 
 // getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry
@@ -1530,19 +1534,23 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
   // Get a stub if the target supports it.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
   JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
-  return JE->getJITResolver().getFunctionStub(F);
+  return JE->getJITResolver().getLazyFunctionStub(F);
 }
 
 void JIT::updateFunctionStub(Function *F) {
   // Get the empty stub we generated earlier.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
   JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
-  void *Stub = JE->getJITResolver().getFunctionStub(F);
+  void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
+  void *Addr = getPointerToGlobalIfAvailable(F);
 
   // Tell the target jit info to rewrite the stub at the specified address,
   // rather than creating a new one.
-  void *Addr = getPointerToGlobalIfAvailable(F);
-  getJITInfo().emitFunctionStubAtAddr(F, Addr, Stub, *getCodeEmitter());
+  MachineCodeEmitter::BufferState BS;
+  TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout();
+  JE->startGVStub(BS, Stub, layout.Size);
+  getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter());
+  JE->finishGVStub(BS);
 }
 
 /// freeMachineCodeForFunction - release machine code memory for given Function.
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 59340d4..9cf9c89 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -39,6 +39,7 @@ using namespace cl;
 //===----------------------------------------------------------------------===//
 // Template instantiations and anchors.
 //
+namespace llvm { namespace cl {
 TEMPLATE_INSTANTIATION(class basic_parser<bool>);
 TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
 TEMPLATE_INSTANTIATION(class basic_parser<int>);
@@ -53,6 +54,7 @@ TEMPLATE_INSTANTIATION(class opt<int>);
 TEMPLATE_INSTANTIATION(class opt<std::string>);
 TEMPLATE_INSTANTIATION(class opt<char>);
 TEMPLATE_INSTANTIATION(class opt<bool>);
+} } // end namespace llvm::cl
 
 void Option::anchor() {}
 void basic_parser_impl::anchor() {}
@@ -156,9 +158,9 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value,
                             const StringMap<Option*> &OptionsMap) {
   // Reject all dashes.
   if (Arg.empty()) return 0;
-  
+
   size_t EqualPos = Arg.find('=');
-  
+
   // If we have an equals sign, remember the value.
   if (EqualPos == StringRef::npos) {
     // Look up the option.
@@ -171,13 +173,43 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value,
   StringMap<Option*>::const_iterator I =
     OptionsMap.find(Arg.substr(0, EqualPos));
   if (I == OptionsMap.end()) return 0;
-  
+
   Value = Arg.substr(EqualPos+1);
   Arg = Arg.substr(0, EqualPos);
   return I->second;
 }
 
+/// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that
+/// does special handling of cl::CommaSeparated options.
+static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
+                                         StringRef ArgName,
+                                         StringRef Value, bool MultiArg = false)
+{
+  // Check to see if this option accepts a comma separated list of values.  If
+  // it does, we have to split up the value into multiple values.
+  if (Handler->getMiscFlags() & CommaSeparated) {
+    StringRef Val(Value);
+    StringRef::size_type Pos = Val.find(',');
+
+    while (Pos != StringRef::npos) {
+      // Process the portion before the comma.
+      if (Handler->addOccurrence(pos, ArgName, Val.substr(0, Pos), MultiArg))
+        return true;
+      // Erase the portion before the comma, AND the comma.
+      Val = Val.substr(Pos+1);
+      Value.substr(Pos+1);  // Increment the original value pointer as well.
+      // Check for another comma.
+      Pos = Val.find(',');
+    }
 
+    Value = Val;
+  }
+
+  if (Handler->addOccurrence(pos, ArgName, Value, MultiArg))
+    return true;
+
+  return false;
+}
 
 /// ProvideOption - For Value, this differentiates between an empty value ("")
 /// and a null value (StringRef()).  The later is accepted for arguments that
@@ -209,7 +241,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
     break;
   case ValueOptional:
     break;
-      
+
   default:
     errs() << ProgramName
          << ": Bad ValueMask flag! CommandLine usage error:"
@@ -219,13 +251,13 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
 
   // If this isn't a multi-arg option, just run the handler.
   if (NumAdditionalVals == 0)
-    return Handler->addOccurrence(i, ArgName, Value);
+    return CommaSeparateAndAddOccurence(Handler, i, ArgName, Value);
 
   // If it is, run the handle several times.
   bool MultiArg = false;
 
   if (Value.data()) {
-    if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
+    if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
       return true;
     --NumAdditionalVals;
     MultiArg = true;
@@ -235,8 +267,8 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
     if (i+1 >= argc)
       return Handler->error("not enough values!");
     Value = argv[++i];
-    
-    if (Handler->addOccurrence(i, ArgName, Value, MultiArg))
+
+    if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
       return true;
     MultiArg = true;
     --NumAdditionalVals;
@@ -298,7 +330,7 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
   size_t Length = 0;
   Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap);
   if (PGOpt == 0) return 0;
-  
+
   // If the option is a prefixed option, then the value is simply the
   // rest of the name...  so fall through to later processing, by
   // setting up the argument name flags and value fields.
@@ -308,16 +340,16 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
     assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
     return PGOpt;
   }
-  
+
   // This must be a grouped option... handle them now.  Grouping options can't
   // have values.
   assert(isGrouping(PGOpt) && "Broken getOptionPred!");
-  
+
   do {
     // Move current arg name out of Arg into OneArgName.
     StringRef OneArgName = Arg.substr(0, Length);
     Arg = Arg.substr(Length);
-    
+
     // Because ValueRequired is an invalid flag for grouped arguments,
     // we don't need to pass argc/argv in.
     assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
@@ -325,11 +357,11 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
     int Dummy;
     ErrorParsing |= ProvideOption(PGOpt, OneArgName,
                                   StringRef(), 0, 0, Dummy);
-    
+
     // Get the next grouping option.
     PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
   } while (PGOpt && Length != Arg.size());
-  
+
   // Return the last option with Arg cut down to just the last one.
   return PGOpt;
 }
@@ -366,17 +398,17 @@ static void ParseCStringVector(std::vector<char *> &OutputVector,
       WorkStr = WorkStr.substr(Pos);
       continue;
     }
-    
+
     // Find position of first delimiter.
     size_t Pos = WorkStr.find_first_of(Delims);
     if (Pos == StringRef::npos) Pos = WorkStr.size();
-    
+
     // Everything from 0 to Pos is the next word to copy.
     char *NewStr = (char*)malloc(Pos+1);
     memcpy(NewStr, WorkStr.data(), Pos);
     NewStr[Pos] = 0;
     OutputVector.push_back(NewStr);
-    
+
     WorkStr = WorkStr.substr(Pos);
   }
 }
@@ -563,7 +595,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
         ProvidePositionalOption(ActivePositionalArg, argv[i], i);
         continue;  // We are done!
       }
-      
+
       if (!PositionalOpts.empty()) {
         PositionalVals.push_back(std::make_pair(argv[i],i));
 
@@ -593,7 +625,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       // Eat leading dashes.
       while (!ArgName.empty() && ArgName[0] == '-')
         ArgName = ArgName.substr(1);
-      
+
       Handler = LookupOption(ArgName, Value, Opts);
       if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
         ProvidePositionalOption(ActivePositionalArg, argv[i], i);
@@ -605,7 +637,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       // Eat leading dashes.
       while (!ArgName.empty() && ArgName[0] == '-')
         ArgName = ArgName.substr(1);
-      
+
       Handler = LookupOption(ArgName, Value, Opts);
 
       // Check to see if this "option" is really a prefixed or grouped argument.
@@ -627,26 +659,6 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       continue;
     }
 
-    // Check to see if this option accepts a comma separated list of values.  If
-    // it does, we have to split up the value into multiple values.
-    if (Handler->getMiscFlags() & CommaSeparated) {
-      StringRef Val(Value);
-      StringRef::size_type Pos = Val.find(',');
-
-      while (Pos != StringRef::npos) {
-        // Process the portion before the comma.
-        ErrorParsing |= ProvideOption(Handler, ArgName, Val.substr(0, Pos),
-                                      argc, argv, i);
-        // Erase the portion before the comma, AND the comma.
-        Val = Val.substr(Pos+1);
-        Value.substr(Pos+1);  // Increment the original value pointer as well.
-
-        // Check for another comma.
-        Pos = Val.find(',');
-      }
-      Value = Val;
-    }
-
     // If this is a named positional argument, just remember that it is the
     // active one...
     if (Handler->getFormattingFlag() == cl::Positional)
@@ -881,7 +893,7 @@ bool parser<bool>::parse(Option &O, StringRef ArgName,
     Value = true;
     return false;
   }
-  
+
   if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
     Value = false;
     return false;
@@ -903,7 +915,7 @@ bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName,
     Value = BOU_FALSE;
     return false;
   }
-  
+
   return O.error("'" + Arg +
                  "' is invalid value for boolean argument! Try 0 or 1");
 }
@@ -1020,7 +1032,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
 
 static int OptNameCompare(const void *LHS, const void *RHS) {
   typedef std::pair<const char *, Option*> pair_ty;
-  
+
   return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
 }
 
@@ -1054,11 +1066,11 @@ public:
       // Ignore really-hidden options.
       if (I->second->getOptionHiddenFlag() == ReallyHidden)
         continue;
-      
+
       // Unless showhidden is set, ignore hidden flags.
       if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
         continue;
-      
+
       // If we've already seen this option, don't add it to the list again.
       if (!OptionSet.insert(I->second))
         continue;
@@ -1066,7 +1078,7 @@ public:
       Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
                                                       I->second));
     }
-    
+
     // Sort the options list alphabetically.
     qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
 
@@ -1164,7 +1176,7 @@ public:
 
     std::vector<std::pair<const char *, const Target*> > Targets;
     size_t Width = 0;
-    for (TargetRegistry::iterator it = TargetRegistry::begin(), 
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
            ie = TargetRegistry::end(); it != ie; ++it) {
       Targets.push_back(std::make_pair(it->getName(), &*it));
       Width = std::max(Width, strlen(Targets.back().first));
@@ -1183,7 +1195,7 @@ public:
   }
   void operator=(bool OptionWasSpecified) {
     if (!OptionWasSpecified) return;
-    
+
     if (OverrideVersionPrinter == 0) {
       print();
       exit(1);
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 4b93f7f..7dd42f4 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -136,7 +136,7 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
 /// @param Type - If non-null, the kind of message (e.g., "error") which is
 /// prefixed to the message.
 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
-                                   const char *Type) const {
+                                   const char *Type, bool ShowLine) const {
   
   // First thing to do: find the current buffer containing the specified
   // location.
@@ -144,18 +144,22 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
   assert(CurBuf != -1 && "Invalid or unspecified location!");
   
   MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
-  
-  
+
   // Scan backward to find the start of the line.
   const char *LineStart = Loc.getPointer();
-  while (LineStart != CurMB->getBufferStart() && 
+  while (LineStart != CurMB->getBufferStart() &&
          LineStart[-1] != '\n' && LineStart[-1] != '\r')
     --LineStart;
-  // Get the end of the line.
-  const char *LineEnd = Loc.getPointer();
-  while (LineEnd != CurMB->getBufferEnd() && 
-         LineEnd[0] != '\n' && LineEnd[0] != '\r')
-    ++LineEnd;
+
+  std::string LineStr;
+  if (ShowLine) {
+    // Get the end of the line.
+    const char *LineEnd = Loc.getPointer();
+    while (LineEnd != CurMB->getBufferEnd() &&
+           LineEnd[0] != '\n' && LineEnd[0] != '\r')
+      ++LineEnd;
+    LineStr = std::string(LineStart, LineEnd);
+  }
   
   std::string PrintedMsg;
   if (Type) {
@@ -163,22 +167,21 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
     PrintedMsg += ": ";
   }
   PrintedMsg += Msg;
-  
-  // Print out the line.
+
   return SMDiagnostic(CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
                       Loc.getPointer()-LineStart, PrintedMsg,
-                      std::string(LineStart, LineEnd));
+                      LineStr, ShowLine);
 }
 
 void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, 
-                             const char *Type) const {
+                             const char *Type, bool ShowLine) const {
   raw_ostream &OS = errs();
 
   int CurBuf = FindBufferContainingLoc(Loc);
   assert(CurBuf != -1 && "Invalid or unspecified location!");
   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
 
-  GetMessage(Loc, Msg, Type).Print(0, OS);
+  GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS);
 }
 
 //===----------------------------------------------------------------------===//
@@ -201,8 +204,8 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) {
   }
   
   S << ": " << Message << '\n';
-  
-  if (LineNo != -1 && ColumnNo != -1) {
+
+  if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
     S << LineContents << '\n';
     
     // Print out spaces/tabs before the caret.
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 51e1100..2d023e4 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -23,7 +23,7 @@ static char ascii_tolower(char x) {
 
 /// compare_lower - Compare strings, ignoring case.
 int StringRef::compare_lower(StringRef RHS) const {
-  for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
+  for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
     char LHC = ascii_tolower(Data[I]);
     char RHC = ascii_tolower(RHS.Data[I]);
     if (LHC != RHC)
@@ -48,7 +48,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
   size_t N = Str.size();
   if (N > Length)
     return npos;
-  for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
+  for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
     if (substr(i, N).equals(Str))
       return i;
   return npos;
@@ -76,7 +76,7 @@ size_t StringRef::rfind(StringRef Str) const {
 /// Note: O(size() * Chars.size())
 StringRef::size_type StringRef::find_first_of(StringRef Chars,
                                               size_t From) const {
-  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
     if (Chars.find(Data[i]) != npos)
       return i;
   return npos;
@@ -85,7 +85,7 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars,
 /// find_first_not_of - Find the first character in the string that is not
 /// \arg C or npos if not found.
 StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
-  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
     if (Data[i] != C)
       return i;
   return npos;
@@ -97,7 +97,7 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
 /// Note: O(size() * Chars.size())
 StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
                                                   size_t From) const {
-  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
     if (Chars.find(Data[i]) == npos)
       return i;
   return npos;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 840fb98..2fec094 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -90,6 +90,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
   case Linux: return "linux";
+  case Lv2: return "lv2";
   case MinGW32: return "mingw32";
   case MinGW64: return "mingw64";
   case NetBSD: return "netbsd";
@@ -227,7 +228,7 @@ void Triple::Parse() const {
     Arch = pic16;
   else if (ArchName == "powerpc")
     Arch = ppc;
-  else if (ArchName == "powerpc64")
+  else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
     Arch = ppc64;
   else if (ArchName == "arm" ||
            ArchName.startswith("armv") ||
@@ -293,6 +294,8 @@ void Triple::Parse() const {
     OS = FreeBSD;
   else if (OSName.startswith("linux"))
     OS = Linux;
+  else if (OSName.startswith("lv2"))
+    OS = Lv2;
   else if (OSName.startswith("mingw32"))
     OS = MinGW32;
   else if (OSName.startswith("mingw64"))
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index 2945e33..a56a1f7 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -42,5 +42,5 @@ add_llvm_library(LLVMSystem
   )
 
 if( BUILD_SHARED_LIBS AND NOT WIN32 )
-  target_link_libraries(LLVMSystem dl)
+  target_link_libraries(LLVMSystem ${CMAKE_DL_LIBS})
 endif()
diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp
index 37591a5..e112698 100644
--- a/lib/System/Host.cpp
+++ b/lib/System/Host.cpp
@@ -22,6 +22,9 @@
 #ifdef LLVM_ON_WIN32
 #include "Win32/Host.inc"
 #endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index 89285b4..ff1497a 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -335,7 +335,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
   free(pv);
   return (NULL);
 }
-#endif
+#endif // __FreeBSD__
 
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
@@ -348,7 +348,8 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
   uint32_t size = sizeof(exe_path);
   if (_NSGetExecutablePath(exe_path, &size) == 0) {
     char link_path[MAXPATHLEN];
-    return Path(std::string(realpath(exe_path, link_path)));
+    if (realpath(exe_path, link_path))
+      return Path(std::string(link_path));
   }
 #elif defined(__FreeBSD__)
   char exe_path[PATH_MAX];
@@ -370,7 +371,8 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
   // If the filename is a symlink, we need to resolve and return the location of
   // the actual executable.
   char link_path[MAXPATHLEN];
-  return Path(std::string(realpath(DLInfo.dli_fname, link_path)));
+  if (realpath(DLInfo.dli_fname, link_path))
+    return Path(std::string(link_path));
 #endif
   return Path();
 }
@@ -454,6 +456,20 @@ Path::canWrite() const {
 }
 
 bool
+Path::isRegularFile() const {
+  // Get the status so we can determine if its a file or directory
+  struct stat buf;
+
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+
+  if (S_ISREG(buf.st_mode))
+    return true;
+
+  return false;
+}
+
+bool
 Path::canExecute() const {
   if (0 != access(path.c_str(), R_OK | X_OK ))
     return false;
@@ -723,7 +739,7 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
 
 bool
 Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
-  // Get the status so we can determin if its a file or directory
+  // Get the status so we can determine if its a file or directory
   struct stat buf;
   if (0 != stat(path.c_str(), &buf)) {
     MakeErrMsg(ErrStr, path + ": can't get status of file");
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index 573369e..634fbc7 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -357,6 +357,13 @@ Path::canExecute() const {
   return attr != INVALID_FILE_ATTRIBUTES;
 }
 
+bool
+Path::isRegularFile() const {
+  if (isDirectory())
+    return false;
+  return true;
+}
+
 std::string
 Path::getLast() const {
   // Find the last slash
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b50b609..c95d4c8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -402,6 +402,21 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
   return Found;
 }
 
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isPredicable())
+    return false;
+
+  if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+    ARMFunctionInfo *AFI =
+      MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+    return AFI->isThumb2Function();
+  }
+  return true;
+}
 
 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
@@ -647,11 +662,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
              SrcRC == ARM::DPR_VFP2RegisterClass ||
              SrcRC == ARM::DPR_8RegisterClass) {
     // Always use neon reg-reg move if source or dest is NEON-only regclass.
-    BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon),
+                           DestReg).addReg(SrcReg));
   } else if (DestRC == ARM::QPRRegisterClass ||
              DestRC == ARM::QPR_VFP2RegisterClass ||
              DestRC == ARM::QPR_8RegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ),
+                           DestReg).addReg(SrcReg));
   } else {
     return false;
   }
@@ -695,13 +712,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     // FIXME: Neon instructions should support predicates
     if (Align >= 16
         && (getRegisterInfo().needsStackRealignment(MF))) {
-      BuildMI(MBB, I, DL, get(ARM::VST1q64))
-        .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO)
-        .addReg(SrcReg, getKillRegState(isKill));
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
+                     .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+                     .addMemOperand(MMO)
+                     .addReg(SrcReg, getKillRegState(isKill)));
     } else {
-      BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
-        addReg(SrcReg, getKillRegState(isKill))
-        .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
+                     addReg(SrcReg, getKillRegState(isKill))
+                     .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     }
   }
 }
@@ -740,11 +758,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     // FIXME: Neon instructions should support predicates
     if (Align >= 16
         && (getRegisterInfo().needsStackRealignment(MF))) {
-      BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
-        .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO);
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+                     .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+                     .addMemOperand(MMO));
     } else {
-      BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
-        addMemOperand(MMO);
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg)
+                     .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     }
   }
 }
@@ -978,7 +997,10 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
                                   const MachineInstr *MI1,
                                   const MachineRegisterInfo *MRI) const {
   int Opcode = MI0->getOpcode();
-  if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) {
+  if (Opcode == ARM::t2LDRpci ||
+      Opcode == ARM::t2LDRpci_pic ||
+      Opcode == ARM::tLDRpci ||
+      Opcode == ARM::tLDRpci_pic) {
     if (MI1->getOpcode() != Opcode)
       return false;
     if (MI0->getNumOperands() != MI1->getNumOperands())
@@ -1005,16 +1027,6 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
   return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
 }
 
-unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB,
-                                                unsigned DefaultLimit) const {
-  // If the target processor can predict indirect branches, it is highly
-  // desirable to duplicate them, since it can often make them predictable.
-  if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) &&
-      getSubtarget().hasBranchTargetBuffer())
-    return DefaultLimit + 2;
-  return DefaultLimit;
-}
-
 /// getInstrPredicate - If instruction is predicated, returns its predicate
 /// condition, otherwise returns AL. It also returns the condition code
 /// register by reference.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 73e854f..282e30c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -162,6 +162,22 @@ namespace ARMII {
     I_BitShift     = 25,
     CondShift      = 28
   };
+
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // ARM Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_LO16 - On a symbol operand, this represents a relocation containing
+    /// lower 16 bit of the address. Used only via movw instruction.
+    MO_LO16,
+
+    /// MO_HI16 - On a symbol operand, this represents a relocation containing
+    /// higher 16 bit of the address. Used only via movt instruction.
+    MO_HI16
+  };
 }
 
 class ARMBaseInstrInfo : public TargetInstrInfoImpl {
@@ -220,6 +236,8 @@ public:
   virtual bool DefinesPredicate(MachineInstr *MI,
                                 std::vector<MachineOperand> &Pred) const;
 
+  virtual bool isPredicable(MachineInstr *MI) const;
+
   /// GetInstSize - Returns the size of the specified MachineInstr.
   ///
   virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
@@ -272,9 +290,6 @@ public:
 
   virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
                            const MachineRegisterInfo *MRI) const;
-
-  virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB,
-                                        unsigned DefaultLimit) const;
 };
 
 static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 19762ee..653328d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -799,6 +799,54 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return ARM::SP;
 }
 
+int
+ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI,
+                                            unsigned &FrameReg) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+  bool isFixed = MFI->isFixedObjectIndex(FI);
+
+  FrameReg = ARM::SP;
+  if (AFI->isGPRCalleeSavedArea1Frame(FI))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+    Offset -= AFI->getDPRCalleeSavedAreaOffset();
+  else if (needsStackRealignment(MF)) {
+    // When dynamically realigning the stack, use the frame pointer for
+    // parameters, and the stack pointer for locals.
+    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+    if (isFixed) {
+      FrameReg = getFrameRegister(MF);
+      Offset -= AFI->getFramePtrSpillOffset();
+    }
+  } else if (hasFP(MF) && AFI->hasStackFrame()) {
+    if (isFixed || MFI->hasVarSizedObjects()) {
+      // Use frame pointer to reference fixed objects unless this is a
+      // frameless function.
+      FrameReg = getFrameRegister(MF);
+      Offset -= AFI->getFramePtrSpillOffset();
+    } else if (AFI->isThumb2Function()) {
+      // In Thumb2 mode, the negative offset is very limited.
+      int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+      if (FPOffset >= -255 && FPOffset < 0) {
+        FrameReg = getFrameRegister(MF);
+        Offset = FPOffset;
+      }
+    }
+  }
+  return Offset;
+}
+
+
+int
+ARMBaseRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+  unsigned FrameReg;
+  return getFrameIndexReference(MF, FI, FrameReg);
+}
+
 unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
   return 0;
@@ -1115,45 +1163,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }
 
-  unsigned FrameReg = ARM::SP;
   int FrameIndex = MI.getOperand(i).getIndex();
   int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj;
-  bool isFixed = MFI->isFixedObjectIndex(FrameIndex);
+  unsigned FrameReg;
 
-  // When doing dynamic stack realignment, all of these need to change(?)
-  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea1Offset();
-  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea2Offset();
-  else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
-    Offset -= AFI->getDPRCalleeSavedAreaOffset();
-  else if (needsStackRealignment(MF)) {
-    // When dynamically realigning the stack, use the frame pointer for
-    // parameters, and the stack pointer for locals.
-    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
-    if (isFixed) {
-      FrameReg = getFrameRegister(MF);
-      Offset -= AFI->getFramePtrSpillOffset();
-      // When referencing from the frame pointer, stack pointer adjustments
-      // don't matter.
-      SPAdj = 0;
-    }
-  } else if (hasFP(MF) && AFI->hasStackFrame()) {
-    assert(SPAdj == 0 && "Unexpected stack offset!");
-    if (isFixed || MFI->hasVarSizedObjects()) {
-      // Use frame pointer to reference fixed objects unless this is a
-      // frameless function.
-      FrameReg = getFrameRegister(MF);
-      Offset -= AFI->getFramePtrSpillOffset();
-    } else if (AFI->isThumb2Function()) {
-      // In Thumb2 mode, the negative offset is very limited.
-      int FPOffset = Offset - AFI->getFramePtrSpillOffset();
-      if (FPOffset >= -255 && FPOffset < 0) {
-        FrameReg = getFrameRegister(MF);
-        Offset = FPOffset;
-      }
-    }
-  }
+  Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
+  if (FrameReg != ARM::SP)
+    SPAdj = 0;
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   bool Done = false;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 4b267b0..2788d07 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -106,6 +106,9 @@ public:
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
+  int getFrameIndexReference(MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const;
+  int getFrameIndexOffset(MachineFunction &MF, int FI) const;
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 766acff..17e7d44 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -613,7 +613,6 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
     break;
   case TargetInstrInfo::IMPLICIT_DEF:
   case TargetInstrInfo::KILL:
-  case ARM::DWARF_LOC:
     // Do nothing.
     break;
   case ARM::CONSTPOOL_ENTRY:
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index d22c43a..e59a315 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -162,6 +162,9 @@ namespace {
     /// the branch fix up pass.
     bool HasFarJump;
 
+    /// HasInlineAsm - True if the function contains inline assembly.
+    bool HasInlineAsm;
+
     const TargetInstrInfo *TII;
     const ARMSubtarget *STI;
     ARMFunctionInfo *AFI;
@@ -236,10 +239,19 @@ void ARMConstantIslands::verify(MachineFunction &MF) {
     if (!MBB->empty() &&
         MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
       unsigned MBBId = MBB->getNumber();
-      assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+      assert(HasInlineAsm ||
+             (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
              (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
     }
   }
+  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+    CPUser &U = CPUsers[i];
+    unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+    unsigned CPEOffset  = GetOffsetOf(U.CPEMI);
+    unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+      UserOffset - CPEOffset;
+    assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+  }
 #endif
 }
 
@@ -269,6 +281,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   isThumb2 = AFI->isThumb2Function();
 
   HasFarJump = false;
+  HasInlineAsm = false;
 
   // Renumber all of the machine basic blocks in the function, guaranteeing that
   // the numbers agree with the position of the block in the function.
@@ -452,6 +465,19 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
 /// and finding all of the constant pool users.
 void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
                                  const std::vector<MachineInstr*> &CPEMIs) {
+  // First thing, see if the function has any inline assembly in it. If so,
+  // we have to be conservative about alignment assumptions, as we don't
+  // know for sure the size of any instructions in the inline assembly.
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I)
+      if (I->getOpcode() == ARM::INLINEASM)
+        HasInlineAsm = true;
+  }
+
+  // Now go back through the instructions and build up our data structures
   unsigned Offset = 0;
   for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
        MBBI != E; ++MBBI) {
@@ -481,7 +507,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
           // A Thumb1 table jump may involve padding; for the offsets to
           // be right, functions containing these must be 4-byte aligned.
           AFI->setAlign(2U);
-          if ((Offset+MBBSize)%4 != 0)
+          if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
             // FIXME: Add a pseudo ALIGN instruction instead.
             MBBSize += 2;           // padding
           continue;   // Does not get an entry in ImmBranches
@@ -550,7 +576,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
           case ARM::LEApcrel:
             // This takes a SoImm, which is 8 bit immediate rotated. We'll
             // pretend the maximum offset is 255 * 4. Since each instruction
-            // 4 byte wide, this is always correct. We'llc heck for other
+            // 4 byte wide, this is always correct. We'll check for other
             // displacements that fits in a SoImm as well.
             Bits = 8;
             Scale = 4;
@@ -609,7 +635,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
     if (isThumb &&
         !MBB.empty() &&
         MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
-        (Offset%4) != 0)
+        ((Offset%4) != 0 || HasInlineAsm))
       MBBSize += 2;
 
     BBSizes.push_back(MBBSize);
@@ -633,7 +659,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
   // alignment padding, and compensate if so.
   if (isThumb &&
       MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
-      Offset%4 != 0)
+      (Offset%4 != 0 || HasInlineAsm))
     Offset += 2;
 
   // Sum instructions before MI in MBB.
@@ -829,7 +855,7 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
                                       MachineInstr *CPEMI, unsigned MaxDisp,
                                       bool NegOk, bool DoDump) {
   unsigned CPEOffset  = GetOffsetOf(CPEMI);
-  assert(CPEOffset%4 == 0 && "Misaligned CPE");
+  assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
 
   if (DoDump) {
     DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
@@ -870,7 +896,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
     if (!isThumb)
       continue;
     MachineBasicBlock *MBB = MBBI;
-    if (!MBB->empty()) {
+    if (!MBB->empty() && !HasInlineAsm) {
       // Constant pool entries require padding.
       if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
         unsigned OldOffset = BBOffsets[i] - delta;
@@ -1226,7 +1252,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
 
   BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
   // Compensate for .align 2 in thumb mode.
-  if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
+  if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
     Size += 2;
   // Increase the size of the island block to account for the new entry.
   BBSizes[NewIsland->getNumber()] += Size;
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4d0f899..c929c54 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -75,17 +75,30 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
     }
     case ARM::t2MOVi32imm: {
       unsigned DstReg = MI.getOperand(0).getReg();
-      unsigned Imm = MI.getOperand(1).getImm();
-      unsigned Lo16 = Imm & 0xffff;
-      unsigned Hi16 = (Imm >> 16) & 0xffff;
       if (!MI.getOperand(0).isDead()) {
-        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::t2MOVi16), DstReg)
-                       .addImm(Lo16));
-        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::t2MOVTi16))
-                       .addReg(DstReg, getDefRegState(true))
-                       .addReg(DstReg).addImm(Hi16));
+        const MachineOperand &MO = MI.getOperand(1);
+        MachineInstrBuilder LO16, HI16;
+
+        LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+                       DstReg);
+        HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+          .addReg(DstReg, getDefRegState(true)).addReg(DstReg);
+
+        if (MO.isImm()) {
+          unsigned Imm = MO.getImm();
+          unsigned Lo16 = Imm & 0xffff;
+          unsigned Hi16 = (Imm >> 16) & 0xffff;
+          LO16 = LO16.addImm(Lo16);
+          HI16 = HI16.addImm(Hi16);
+        } else {
+          GlobalValue *GV = MO.getGlobal();
+          unsigned TF = MO.getTargetFlags();
+          LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+          HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+          // FIXME: What's about memoperands?
+        }
+        AddDefaultPred(LO16);
+        AddDefaultPred(HI16);
       }
       MI.eraseFromParent();
       Modified = true;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 696a8e1..d63f3e6 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -149,6 +149,21 @@ private:
   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
   SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc);
 
+  /// SelectCMOVOp - Select CMOV instructions for ARM.
+  SDNode *SelectCMOVOp(SDValue Op);
+  SDNode *SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                              ARMCC::CondCodes CCVal, SDValue CCR,
+                              SDValue InFlag);
+  SDNode *SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                               ARMCC::CondCodes CCVal, SDValue CCR,
+                               SDValue InFlag);
+  SDNode *SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                              ARMCC::CondCodes CCVal, SDValue CCR,
+                              SDValue InFlag);
+  SDNode *SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                               ARMCC::CondCodes CCVal, SDValue CCR,
+                               SDValue InFlag);
+
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -246,7 +261,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-    } else if (N.getOpcode() == ARMISD::Wrapper) {
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
@@ -448,7 +465,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-    } else if (N.getOpcode() == ARMISD::Wrapper) {
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -543,7 +562,13 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
   }
 
   if (N.getOpcode() != ISD::ADD) {
-    Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        !(Subtarget->useMovt() &&
+          N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    } else
+      Base = N;
+
     Offset = CurDAG->getRegister(0, MVT::i32);
     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
@@ -666,7 +691,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
       return true;
-    } else if (N.getOpcode() == ARMISD::Wrapper) {
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::TargetConstantPool)
         return false;  // We want to select t2LDRpci instead.
@@ -1034,12 +1061,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
   case MVT::v4i32: OpcodeIndex = 2; break;
   }
 
+  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
+                            Pred, PredReg, Chain };
     std::vector<EVT> ResTys(NumVecs, VT);
     ResTys.push_back(MVT::Other);
-    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1047,10 +1077,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
     // Quad registers are directly supported for VLD2,
     // loading 2 pairs of D regs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
+                            Pred, PredReg, Chain };
     std::vector<EVT> ResTys(4, VT);
     ResTys.push_back(MVT::Other);
-    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
     Chain = SDValue(VLd, 4);
 
     // Combine the even and odd subregs to produce the result.
@@ -1071,15 +1102,16 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
 
     // Load the even subregs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
-    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
+    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,
+                             Pred, PredReg, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7);
     Chain = SDValue(VLdA, NumVecs+1);
 
     // Load the odd subregs.
     Opc = QOpcodes1[OpcodeIndex];
     const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
-                             Align, Chain };
-    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
+                             Align, Pred, PredReg, Chain };
+    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7);
     Chain = SDValue(VLdB, NumVecs+1);
 
     // Combine the even and odd subregs to produce the result.
@@ -1123,6 +1155,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   case MVT::v4i32: OpcodeIndex = 2; break;
   }
 
+  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(MemAddr);
   Ops.push_back(MemUpdate);
@@ -1133,8 +1168,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
     unsigned Opc = DOpcodes[OpcodeIndex];
     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
       Ops.push_back(N->getOperand(Vec+3));
+    Ops.push_back(Pred);
+    Ops.push_back(PredReg);
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1148,8 +1185,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
       Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
                                                    N->getOperand(Vec+3)));
     }
+    Ops.push_back(Pred);
+    Ops.push_back(PredReg);
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11);
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
@@ -1162,10 +1201,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
                                                  N->getOperand(Vec+3)));
+  Ops.push_back(Pred);
+  Ops.push_back(PredReg);
   Ops.push_back(Chain);
   unsigned Opc = QOpcodes0[OpcodeIndex];
   SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+5);
+                                        MVT::Other, Ops.data(), NumVecs+7);
   Chain = SDValue(VStA, 1);
 
   // Store the odd subregs.
@@ -1173,10 +1214,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
                                                 N->getOperand(Vec+3));
-  Ops[NumVecs+4] = Chain;
+  Ops[NumVecs+4] = Pred;
+  Ops[NumVecs+5] = PredReg;
+  Ops[NumVecs+6] = Chain;
   Opc = QOpcodes1[OpcodeIndex];
   SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+5);
+                                        MVT::Other, Ops.data(), NumVecs+7);
   Chain = SDValue(VStB, 1);
   ReplaceUses(SDValue(N, 0), Chain);
   return NULL;
@@ -1224,6 +1267,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
   case MVT::v4i32: OpcodeIndex = 1; break;
   }
 
+  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+
   SmallVector<SDValue, 9> Ops;
   Ops.push_back(MemAddr);
   Ops.push_back(MemUpdate);
@@ -1249,15 +1295,17 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
                                                    N->getOperand(Vec+3)));
   }
   Ops.push_back(getI32Imm(Lane));
+  Ops.push_back(Pred);
+  Ops.push_back(PredReg);
   Ops.push_back(Chain);
 
   if (!IsLoad)
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
 
   std::vector<EVT> ResTys(NumVecs, RegVT);
   ResTys.push_back(MVT::Other);
   SDNode *VLdLn =
-    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+5);
+    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+7);
   // For a 64-bit vector load to D registers, nothing more needs to be done.
   if (is64BitVector)
     return VLdLn;
@@ -1282,7 +1330,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op,
     return NULL;
 
   unsigned Shl_imm = 0;
-  if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){
+  if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
     unsigned Srl_imm = 0;
     if (isInt32Immediate(Op.getOperand(1), Srl_imm)) {
@@ -1302,6 +1350,173 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op,
   return NULL;
 }
 
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  SDValue CPTmp0;
+  SDValue CPTmp1;
+  if (SelectT2ShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1)) {
+    unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+    unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+    unsigned Opc = 0;
+    switch (SOShOp) {
+    case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+    case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+    case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+    case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+    default:
+      llvm_unreachable("Unknown so_reg opcode!");
+      break;
+    }
+    SDValue SOShImm =
+      CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  SDValue CPTmp0;
+  SDValue CPTmp1;
+  SDValue CPTmp2;
+  if (SelectShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+  if (!T)
+    return 0;
+
+  if (Predicate_t2_so_imm(TrueVal.getNode())) {
+    SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(Op.getNode(),
+                                ARM::t2MOVCCi, MVT::i32, Ops, 5);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+  if (!T)
+    return 0;
+
+  if (Predicate_so_imm(TrueVal.getNode())) {
+    SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(Op.getNode(),
+                                ARM::MOVCCi, MVT::i32, Ops, 5);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) {
+  EVT VT = Op.getValueType();
+  SDValue FalseVal = Op.getOperand(0);
+  SDValue TrueVal  = Op.getOperand(1);
+  SDValue CC = Op.getOperand(2);
+  SDValue CCR = Op.getOperand(3);
+  SDValue InFlag = Op.getOperand(4);
+  assert(CC.getOpcode() == ISD::Constant);
+  assert(CCR.getOpcode() == ISD::Register);
+  ARMCC::CondCodes CCVal =
+    (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+  if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+    // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Pattern complexity = 18  cost = 1  size = 0
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+    SDValue CPTmp2;
+    if (Subtarget->isThumb()) {
+      SDNode *Res = SelectT2CMOVShiftOp(Op, FalseVal, TrueVal,
+                                        CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectT2CMOVShiftOp(Op, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    } else {
+      SDNode *Res = SelectARMCMOVShiftOp(Op, FalseVal, TrueVal,
+                                         CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectARMCMOVShiftOp(Op, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    }
+
+    // Pattern: (ARMcmov:i32 GPR:i32:$false,
+    //             (imm:i32)<<P:Predicate_so_imm>>:$true,
+    //             (imm:i32):$cc)
+    // Emits: (MOVCCi:i32 GPR:i32:$false,
+    //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+    // Pattern complexity = 10  cost = 1  size = 0
+    if (Subtarget->isThumb()) {
+      SDNode *Res = SelectT2CMOVSoImmOp(Op, FalseVal, TrueVal,
+                                        CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectT2CMOVSoImmOp(Op, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    } else {
+      SDNode *Res = SelectARMCMOVSoImmOp(Op, FalseVal, TrueVal,
+                                         CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectARMCMOVSoImmOp(Op, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    }
+  }
+
+  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Pattern complexity = 6  cost = 1  size = 0
+  //
+  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Pattern complexity = 6  cost = 11  size = 0
+  //
+  // Also FCPYScc and FCPYDcc.
+  SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+  SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+  unsigned Opc = 0;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: assert(false && "Illegal conditional move type!");
+    break;
+  case MVT::i32:
+    Opc = Subtarget->isThumb()
+      ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+      : ARM::MOVCCr;
+    break;
+  case MVT::f32:
+    Opc = ARM::VMOVScc;
+    break;
+  case MVT::f64:
+    Opc = ARM::VMOVDcc;
+    break;
+  }
+  return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+}
+
 SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
@@ -1337,7 +1552,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
 
       SDNode *ResNode;
       if (Subtarget->isThumb1Only()) {
-        SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32);
+        SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
         ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
@@ -1549,122 +1764,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                 SDValue(Chain.getNode(), Chain.getResNo()));
     return NULL;
   }
-  case ARMISD::CMOV: {
-    EVT VT = Op.getValueType();
-    SDValue N0 = Op.getOperand(0);
-    SDValue N1 = Op.getOperand(1);
-    SDValue N2 = Op.getOperand(2);
-    SDValue N3 = Op.getOperand(3);
-    SDValue InFlag = Op.getOperand(4);
-    assert(N2.getOpcode() == ISD::Constant);
-    assert(N3.getOpcode() == ISD::Register);
-
-    if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
-      // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
-      // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
-      // Pattern complexity = 18  cost = 1  size = 0
-      SDValue CPTmp0;
-      SDValue CPTmp1;
-      SDValue CPTmp2;
-      if (Subtarget->isThumb()) {
-        if (SelectT2ShifterOperandReg(Op, N1, CPTmp0, CPTmp1)) {
-          unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
-          unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
-          unsigned Opc = 0;
-          switch (SOShOp) {
-          case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
-          case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
-          case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
-          case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
-          default:
-            llvm_unreachable("Unknown so_reg opcode!");
-            break;
-          }
-          SDValue SOShImm =
-            CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
-          SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                                   cast<ConstantSDNode>(N2)->getZExtValue()),
-                                   MVT::i32);
-          SDValue Ops[] = { N0, CPTmp0, SOShImm, Tmp2, N3, InFlag };
-          return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6);
-        }
-      } else {
-        if (SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
-          SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                                   cast<ConstantSDNode>(N2)->getZExtValue()),
-                                   MVT::i32);
-          SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
-          return CurDAG->SelectNodeTo(Op.getNode(),
-                                      ARM::MOVCCs, MVT::i32, Ops, 7);
-        }
-      }
-
-      // Pattern: (ARMcmov:i32 GPR:i32:$false,
-      //             (imm:i32)<<P:Predicate_so_imm>>:$true,
-      //             (imm:i32):$cc)
-      // Emits: (MOVCCi:i32 GPR:i32:$false,
-      //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
-      // Pattern complexity = 10  cost = 1  size = 0
-      if (N3.getOpcode() == ISD::Constant) {
-        if (Subtarget->isThumb()) {
-          if (Predicate_t2_so_imm(N3.getNode())) {
-            SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
-                                     cast<ConstantSDNode>(N1)->getZExtValue()),
-                                     MVT::i32);
-            SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                                     cast<ConstantSDNode>(N2)->getZExtValue()),
-                                     MVT::i32);
-            SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
-            return CurDAG->SelectNodeTo(Op.getNode(),
-                                        ARM::t2MOVCCi, MVT::i32, Ops, 5);
-          }
-        } else {
-          if (Predicate_so_imm(N3.getNode())) {
-            SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
-                                     cast<ConstantSDNode>(N1)->getZExtValue()),
-                                     MVT::i32);
-            SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                                     cast<ConstantSDNode>(N2)->getZExtValue()),
-                                     MVT::i32);
-            SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
-            return CurDAG->SelectNodeTo(Op.getNode(),
-                                        ARM::MOVCCi, MVT::i32, Ops, 5);
-          }
-        }
-      }
-    }
-
-    // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
-    // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
-    // Pattern complexity = 6  cost = 1  size = 0
-    //
-    // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
-    // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
-    // Pattern complexity = 6  cost = 11  size = 0
-    //
-    // Also FCPYScc and FCPYDcc.
-    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                               cast<ConstantSDNode>(N2)->getZExtValue()),
-                               MVT::i32);
-    SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
-    unsigned Opc = 0;
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: assert(false && "Illegal conditional move type!");
-      break;
-    case MVT::i32:
-      Opc = Subtarget->isThumb()
-        ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
-        : ARM::MOVCCr;
-      break;
-    case MVT::f32:
-      Opc = ARM::VMOVScc;
-      break;
-    case MVT::f64:
-      Opc = ARM::VMOVDcc;
-      break;
-    }
-    return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
-  }
+  case ARMISD::CMOV:
+    return SelectCMOVOp(Op);
   case ARMISD::CNEG: {
     EVT VT = Op.getValueType();
     SDValue N0 = Op.getOperand(0);
@@ -1707,8 +1808,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VZIPq32; break;
     }
-    return CurDAG->getMachineNode(Opc, dl, VT, VT,
-                                  N->getOperand(0), N->getOperand(1));
+    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
   }
   case ARMISD::VUZP: {
     unsigned Opc = 0;
@@ -1724,8 +1827,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VUZPq32; break;
     }
-    return CurDAG->getMachineNode(Opc, dl, VT, VT,
-                                  N->getOperand(0), N->getOperand(1));
+    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
   }
   case ARMISD::VTRN: {
     unsigned Opc = 0;
@@ -1741,8 +1846,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VTRNq32; break;
     }
-    return CurDAG->getMachineNode(Opc, dl, VT, VT,
-                                  N->getOperand(0), N->getOperand(1));
+    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
   }
 
   case ISD::INTRINSIC_VOID:
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c3af8e6..c839fc6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <sstream>
@@ -355,10 +356,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 
-  // Support label based line numbers.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
@@ -1360,10 +1357,17 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
                            PseudoSourceValue::getGOT(), 0);
     return Result;
   } else {
-    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
-    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                       PseudoSourceValue::getConstantPool(), 0);
+    // If we have T2 ops, we can materialize the address directly via movt/movw
+    // pair. This is always cheaper.
+    if (Subtarget->useMovt()) {
+      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                         DAG.getTargetGlobalAddress(GV, PtrVT));
+    } else {
+      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                         PseudoSourceValue::getConstantPool(), 0);
+    }
   }
 }
 
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 83b5cb4..e76e93c 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1217,27 +1217,45 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
 //
 
 class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
-            string asm, string cstr, list<dag> pattern>
+            string opc, string dt, string asm, string cstr, list<dag> pattern>
   : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = iops;
-  let AsmString = asm;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(
+                     !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
+                     !strconcat("\t", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [HasNEON];
 }
 
-class NI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
-  : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, "", pattern> {
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
+            string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
 }
 
-class NI4<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
-  : NeonI<oops, iops, AddrMode4, IndexModeNone, itin, asm, "", pattern> {
+class NI<dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+         list<dag> pattern>
+  : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, "",
+          pattern> {
+}
+
+class NI4<dag oops, dag iops, InstrItinClass itin, string opc,
+          string asm, list<dag> pattern>
+  : NeonXI<oops, iops, AddrMode4, IndexModeNone, itin, opc, asm, "",
+          pattern> {
 }
 
 class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
             dag oops, dag iops, InstrItinClass itin,
-            string asm, string cstr, list<dag> pattern>
-  : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, asm, cstr, pattern> {
+            string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr,
+          pattern> {
   let Inst{31-24} = 0b11110100;
   let Inst{23} = op23;
   let Inst{21-20} = op21_20;
@@ -1246,8 +1264,16 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
 }
 
 class NDataI<dag oops, dag iops, InstrItinClass itin,
-             string asm, string cstr, list<dag> pattern>
-  : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, asm, cstr, pattern> {
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm,
+         cstr, pattern> {
+  let Inst{31-25} = 0b1111001;
+}
+
+class NDataXI<dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm,
+         cstr, pattern> {
   let Inst{31-25} = 0b1111001;
 }
 
@@ -1255,8 +1281,8 @@ class NDataI<dag oops, dag iops, InstrItinClass itin,
 class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
                bit op5, bit op4,
                dag oops, dag iops, InstrItinClass itin,
-               string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, itin, asm, cstr, pattern> {
+               string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
   let Inst{23} = op23;
   let Inst{21-19} = op21_19;
   let Inst{11-8} = op11_8;
@@ -1270,8 +1296,8 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
 class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
           bits<5> op11_7, bit op6, bit op4,
           dag oops, dag iops, InstrItinClass itin,
-          string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, itin, asm, cstr, pattern> {
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
   let Inst{24-23} = op24_23;
   let Inst{21-20} = op21_20;
   let Inst{19-18} = op19_18;
@@ -1281,14 +1307,16 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
   let Inst{4} = op4;
 }
 
-// NEON Vector Duplicate (scalar).
-// Inst{19-16} is specified by subclasses.
-class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4,
-             dag oops, dag iops, InstrItinClass itin,
-             string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, itin, asm, cstr, pattern> {
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+          bits<5> op11_7, bit op6, bit op4,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, string cstr, list<dag> pattern>
+  : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
   let Inst{24-23} = op24_23;
   let Inst{21-20} = op21_20;
+  let Inst{19-18} = op19_18;
+  let Inst{17-16} = op17_16;
   let Inst{11-7} = op11_7;
   let Inst{6} = op6;
   let Inst{4} = op4;
@@ -1297,8 +1325,8 @@ class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4,
 // NEON 2 vector register with immediate.
 class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
              dag oops, dag iops, InstrItinClass itin,
-             string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, itin, asm, cstr, pattern> {
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
   let Inst{24} = op24;
   let Inst{23} = op23;
   let Inst{11-8} = op11_8;
@@ -1310,8 +1338,8 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
 // NEON 3 vector register format.
 class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
           dag oops, dag iops, InstrItinClass itin,
-          string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, itin, asm, cstr, pattern> {
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
   let Inst{24} = op24;
   let Inst{23} = op23;
   let Inst{21-20} = op21_20;
@@ -1320,16 +1348,15 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
   let Inst{4} = op4;
 }
 
-// NEON 3 vector register with immediate.  This is only used for VEXT where
-// op11_8 represents the starting byte index of the extracted result in the
-// concatenation of the operands and is left unspecified.
-class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4,
-             dag oops, dag iops, InstrItinClass itin,
-             string asm, string cstr, list<dag> pattern>
-  : NDataI<oops, iops, itin, asm, cstr, pattern> {
+// Same as N3VX except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, string cstr, list<dag> pattern>
+  : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
   let Inst{24} = op24;
   let Inst{23} = op23;
   let Inst{21-20} = op21_20;
+  let Inst{11-8} = op11_8;
   let Inst{6} = op6;
   let Inst{4} = op4;
 }
@@ -1337,29 +1364,37 @@ class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4,
 // NEON VMOVs between scalar and core registers.
 class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
                dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, list<dag> pattern>
-  : AI<oops, iops, f, itin, opc, asm, pattern> {
+               string opc, string dt, string asm, list<dag> pattern>
+  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
+    "", itin> {
   let Inst{27-20} = opcod1;
   let Inst{11-8} = opcod2;
   let Inst{6-5} = opcod3;
   let Inst{4} = 1;
+
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(
+                     !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
+                     !strconcat("\t", asm));
+  let Pattern = pattern;
   list<Predicate> Predicates = [HasNEON];
 }
 class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
                 dag oops, dag iops, InstrItinClass itin,
-                string opc, string asm, list<dag> pattern>
+                string opc, string dt, string asm, list<dag> pattern>
   : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin,
-             opc, asm, pattern>;
+             opc, dt, asm, pattern>;
 class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
                 dag oops, dag iops, InstrItinClass itin,
-                string opc, string asm, list<dag> pattern>
+                string opc, string dt, string asm, list<dag> pattern>
   : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin,
-             opc, asm, pattern>;
+             opc, dt, asm, pattern>;
 class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
             dag oops, dag iops, InstrItinClass itin,
-            string opc, string asm, list<dag> pattern>
+            string opc, string dt, string asm, list<dag> pattern>
   : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
-             opc, asm, pattern>;
+             opc, dt, asm, pattern>;
 
 // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
 // for single-precision FP.
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 79bde29..7516d3c 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -116,6 +116,10 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
 def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">;
 def CarryDefIsUsed   : Predicate<"N.getNode()->hasAnyUseOfValue(1)">;
 
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt   : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -204,7 +208,7 @@ def hi16 : SDNodeXForm<imm, [{
 def lo16AllZero : PatLeaf<(i32 imm), [{
   // Returns true if all low 16-bits are 0.
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
-  }], hi16>;
+}], hi16>;
 
 /// imm0_65535 predicate - True if the 32-bit immediate is in the range 
 /// [0.65535].
@@ -284,6 +288,22 @@ def so_imm2part_2 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(V, MVT::i32);
 }]>;
 
+def so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
+      return ARM_AM::isSOImmTwoPartVal(-(int)N->getZExtValue());
+    }]> {
+  let PrintMethod = "printSOImm2PartOperand";
+}
+
+def so_neg_imm2part_1 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getSOImmTwoPartFirst(-(int)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def so_neg_imm2part_2 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getSOImmTwoPartSecond(-(int)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
 /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
 def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
   return (int32_t)N->getZExtValue() < 32;
@@ -568,12 +588,6 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
            [(ARMcallseq_start timm:$amt)]>;
 }
 
-def DWARF_LOC :
-PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary,
-           ".loc $file, $line, $col",
-           [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
-
-
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -581,25 +595,24 @@ def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
                    [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
 
 let AddedComplexity = 10 in {
-let canFoldAsLoad = 1 in
 def PICLDR  : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
                   Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr",
                   [(set GPR:$dst, (load addrmodepc:$addr))]>;
 
 def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h\t$dst, $addr",
+                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b\t$dst, $addr",
+                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
 
 def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh\t$dst, $addr",
+               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr",
                   [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb\t$dst, $addr",
+               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr",
                   [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
 }
 let AddedComplexity = 10 in {
@@ -801,13 +814,14 @@ let isBranch = 1, isTerminator = 1 in {
 //
 
 // Load
-let canFoldAsLoad = 1, isReMaterializable = 1 in 
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in 
 def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                "ldr", "\t$dst, $addr",
                [(set GPR:$dst, (load addrmode2:$addr))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+    mayHaveSideEffects = 1  in
 def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                  "ldr", "\t$dst, $addr", []>;
 
@@ -992,7 +1006,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
 let Constraints = "$src = $dst" in
 def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
                   DPFrm, IIC_iMOVi,
-                  "movt", "\t$dst, $imm", 
+                  "movt", "\t$dst, $imm",
                   [(set GPR:$dst,
                         (or (and GPR:$src, 0xffff), 
                             lo16AllZero:$imm))]>, UnaryDP,
@@ -1593,12 +1607,6 @@ let Defs =
 // Non-Instruction Patterns
 //
 
-// ConstantPool, GlobalAddress, and JumpTable
-def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
-def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
-def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-             (LEApcrelJT tjumptable:$dst, imm:$id)>;
-
 // Large immediate handling.
 
 // Two piece so_imms.
@@ -1618,9 +1626,9 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
 def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS),
              (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
                     (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS),
-             (SUBri (SUBri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
-                    (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
+             (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)),
+                    (so_neg_imm2part_2 imm:$RHS))>;
 
 // 32-bit immediate using movw + movt.
 // This is a single pseudo instruction, the benefit is that it can be remat'd
@@ -1628,10 +1636,19 @@ def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS),
 // FIXME: Remove this when we can do generalized remat.
 let isReMaterializable = 1 in
 def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
-                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>,
                Requires<[IsARM, HasV6T2]>;
 
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+            Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+            Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+             (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
 // TODO: add,sub,and, 3-instr forms?
 
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index e1353b7..3166931 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -124,7 +124,7 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 def VLDMD : NI<(outs),
                (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
                IIC_fpLoadm,
-               "vldm${addr:submode} ${addr:base}, $dst1",
+               "vldm", "${addr:submode} ${addr:base}, $dst1",
                []> {
   let Inst{27-25} = 0b110;
   let Inst{20}    = 1;
@@ -134,7 +134,7 @@ def VLDMD : NI<(outs),
 def VLDMS : NI<(outs),
                (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
                IIC_fpLoadm,
-               "vldm${addr:submode} ${addr:base}, $dst1",
+               "vldm", "${addr:submode} ${addr:base}, $dst1",
                []> {
   let Inst{27-25} = 0b110;
   let Inst{20}    = 1;
@@ -146,7 +146,7 @@ def VLDMS : NI<(outs),
 // Use vldmia to load a Q register as a D register pair.
 def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
                IIC_fpLoadm,
-               "vldmia\t$addr, ${dst:dregpair}",
+               "vldmia", "$addr, ${dst:dregpair}",
                [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
   let Inst{27-25} = 0b110;
   let Inst{24}    = 0; // P bit
@@ -158,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
 // Use vstmia to store a Q register as a D register pair.
 def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
                IIC_fpStorem,
-               "vstmia\t$addr, ${src:dregpair}",
+               "vstmia", "$addr, ${src:dregpair}",
                [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
   let Inst{27-25} = 0b110;
   let Inst{24}    = 0; // P bit
@@ -168,178 +168,221 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
 }
 
 //   VLD1     : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt,
+            ValueType Ty, Intrinsic IntOp>
   : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
-          !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "",
+          OpcodeStr, Dt, "\\{$dst\\}, $addr", "",
           [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt,
+            ValueType Ty, Intrinsic IntOp>
   : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
-          !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "",
+          OpcodeStr, Dt, "${dst:dregpair}, $addr", "",
           [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
 
-def  VLD1d8   : VLD1D<0b0000, "vld1.8",  v8i8,  int_arm_neon_vld1>;
-def  VLD1d16  : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>;
-def  VLD1d32  : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>;
-def  VLD1df   : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>;
-def  VLD1d64  : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>;
+def  VLD1d8   : VLD1D<0b0000, "vld1", "8",  v8i8,  int_arm_neon_vld1>;
+def  VLD1d16  : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>;
+def  VLD1d32  : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>;
+def  VLD1df   : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>;
+def  VLD1d64  : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>;
 
-def  VLD1q8   : VLD1Q<0b0000, "vld1.8",  v16i8, int_arm_neon_vld1>;
-def  VLD1q16  : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>;
-def  VLD1q32  : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>;
-def  VLD1qf   : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>;
-def  VLD1q64  : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>;
+def  VLD1q8   : VLD1Q<0b0000, "vld1", "8",  v16i8, int_arm_neon_vld1>;
+def  VLD1q16  : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>;
+def  VLD1q32  : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
+def  VLD1qf   : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
+def  VLD1q64  : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 
 //   VLD2     : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op7_4, string OpcodeStr>
+class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2),
           (ins addrmode6:$addr), IIC_VLD2,
-          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>;
-class VLD2Q<bits<4> op7_4, string OpcodeStr>
+          OpcodeStr, Dt, "\\{$dst1,$dst2\\}, $addr", "", []>;
+class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b10,0b0011,op7_4,
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
           (ins addrmode6:$addr), IIC_VLD2,
-          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+          OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr",
           "", []>;
 
-def  VLD2d8   : VLD2D<0b0000, "vld2.8">;
-def  VLD2d16  : VLD2D<0b0100, "vld2.16">;
-def  VLD2d32  : VLD2D<0b1000, "vld2.32">;
+def  VLD2d8   : VLD2D<0b0000, "vld2", "8">;
+def  VLD2d16  : VLD2D<0b0100, "vld2", "16">;
+def  VLD2d32  : VLD2D<0b1000, "vld2", "32">;
 def  VLD2d64  : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
                       (ins addrmode6:$addr), IIC_VLD1,
-                      "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>;
+                      "vld1", "64", "\\{$dst1,$dst2\\}, $addr", "", []>;
 
-def  VLD2q8   : VLD2Q<0b0000, "vld2.8">;
-def  VLD2q16  : VLD2Q<0b0100, "vld2.16">;
-def  VLD2q32  : VLD2Q<0b1000, "vld2.32">;
+def  VLD2q8   : VLD2Q<0b0000, "vld2", "8">;
+def  VLD2q16  : VLD2Q<0b0100, "vld2", "16">;
+def  VLD2q32  : VLD2Q<0b1000, "vld2", "32">;
 
 //   VLD3     : Vector Load (multiple 3-element structures)
-class VLD3D<bits<4> op7_4, string OpcodeStr>
+class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
           (ins addrmode6:$addr), IIC_VLD3,
-          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>;
-class VLD3WB<bits<4> op7_4, string OpcodeStr>
+          OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>;
+class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
           (ins addrmode6:$addr), IIC_VLD3,
-          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"),
+          OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr",
           "$addr.addr = $wb", []>;
 
-def  VLD3d8   : VLD3D<0b0000, "vld3.8">;
-def  VLD3d16  : VLD3D<0b0100, "vld3.16">;
-def  VLD3d32  : VLD3D<0b1000, "vld3.32">;
+def  VLD3d8   : VLD3D<0b0000, "vld3", "8">;
+def  VLD3d16  : VLD3D<0b0100, "vld3", "16">;
+def  VLD3d32  : VLD3D<0b1000, "vld3", "32">;
 def  VLD3d64  : NLdSt<0,0b10,0b0110,0b1100,
                       (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
                       (ins addrmode6:$addr), IIC_VLD1,
-                      "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>;
+                      "vld1", "64", "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>;
 
 // vld3 to double-spaced even registers.
-def  VLD3q8a  : VLD3WB<0b0000, "vld3.8">;
-def  VLD3q16a : VLD3WB<0b0100, "vld3.16">;
-def  VLD3q32a : VLD3WB<0b1000, "vld3.32">;
+def  VLD3q8a  : VLD3WB<0b0000, "vld3", "8">;
+def  VLD3q16a : VLD3WB<0b0100, "vld3", "16">;
+def  VLD3q32a : VLD3WB<0b1000, "vld3", "32">;
 
 // vld3 to double-spaced odd registers.
-def  VLD3q8b  : VLD3WB<0b0000, "vld3.8">;
-def  VLD3q16b : VLD3WB<0b0100, "vld3.16">;
-def  VLD3q32b : VLD3WB<0b1000, "vld3.32">;
+def  VLD3q8b  : VLD3WB<0b0000, "vld3", "8">;
+def  VLD3q16b : VLD3WB<0b0100, "vld3", "16">;
+def  VLD3q32b : VLD3WB<0b1000, "vld3", "32">;
 
 //   VLD4     : Vector Load (multiple 4-element structures)
-class VLD4D<bits<4> op7_4, string OpcodeStr>
+class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b10,0b0000,op7_4,
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
           (ins addrmode6:$addr), IIC_VLD4,
-          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+          OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr",
           "", []>;
-class VLD4WB<bits<4> op7_4, string OpcodeStr>
+class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b10,0b0001,op7_4,
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
           (ins addrmode6:$addr), IIC_VLD4,
-          !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"),
+          OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr",
           "$addr.addr = $wb", []>;
 
-def  VLD4d8   : VLD4D<0b0000, "vld4.8">;
-def  VLD4d16  : VLD4D<0b0100, "vld4.16">;
-def  VLD4d32  : VLD4D<0b1000, "vld4.32">;
+def  VLD4d8   : VLD4D<0b0000, "vld4", "8">;
+def  VLD4d16  : VLD4D<0b0100, "vld4", "16">;
+def  VLD4d32  : VLD4D<0b1000, "vld4", "32">;
 def  VLD4d64  : NLdSt<0,0b10,0b0010,0b1100,
                       (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
                       (ins addrmode6:$addr), IIC_VLD1,
-                      "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>;
+                   "vld1", "64", "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>;
 
 // vld4 to double-spaced even registers.
-def  VLD4q8a  : VLD4WB<0b0000, "vld4.8">;
-def  VLD4q16a : VLD4WB<0b0100, "vld4.16">;
-def  VLD4q32a : VLD4WB<0b1000, "vld4.32">;
+def  VLD4q8a  : VLD4WB<0b0000, "vld4", "8">;
+def  VLD4q16a : VLD4WB<0b0100, "vld4", "16">;
+def  VLD4q32a : VLD4WB<0b1000, "vld4", "32">;
 
 // vld4 to double-spaced odd registers.
-def  VLD4q8b  : VLD4WB<0b0000, "vld4.8">;
-def  VLD4q16b : VLD4WB<0b0100, "vld4.16">;
-def  VLD4q32b : VLD4WB<0b1000, "vld4.32">;
+def  VLD4q8b  : VLD4WB<0b0000, "vld4", "8">;
+def  VLD4q16b : VLD4WB<0b0100, "vld4", "16">;
+def  VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
 
 //   VLD1LN   : Vector Load (single element to one lane)
 //   FIXME: Not yet implemented.
 
 //   VLD2LN   : Vector Load (single 2-element structure to one lane)
-class VLD2LN<bits<4> op11_8, string OpcodeStr>
-  : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-          IIC_VLD2,
-          !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"),
-          "$src1 = $dst1, $src2 = $dst2", []>;
-
-def VLD2LNd8  : VLD2LN<0b0001, "vld2.8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">;
-def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">;
+class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
+  : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
+            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+            IIC_VLD2,
+            OpcodeStr, Dt, "\\{$dst1[$lane],$dst2[$lane]\\}, $addr",
+            "$src1 = $dst1, $src2 = $dst2", []>;
+
+// vld2 to single-spaced registers.
+def VLD2LNd8  : VLD2LN<0b0001, "vld2", "8">;
+def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> {
+  let Inst{5} = 0;
+}
+def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> {
+  let Inst{6} = 0;
+}
 
 // vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">;
-def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">;
+def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> {
+  let Inst{5} = 1;
+}
+def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> {
+  let Inst{6} = 1;
+}
 
 // vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">;
-def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">;
+def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> {
+  let Inst{5} = 1;
+}
+def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> {
+  let Inst{6} = 1;
+}
 
 //   VLD3LN   : Vector Load (single 3-element structure to one lane)
-class VLD3LN<bits<4> op11_8, string OpcodeStr>
-  : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-          nohash_imm:$lane), IIC_VLD3,
-          !strconcat(OpcodeStr,
-          "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"),
-          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
-
-def VLD3LNd8  : VLD3LN<0b0010, "vld3.8">;
-def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">;
-def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">;
+class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
+  : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+            nohash_imm:$lane), IIC_VLD3,
+            OpcodeStr, Dt,
+            "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr",
+            "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+
+// vld3 to single-spaced registers.
+def VLD3LNd8  : VLD3LN<0b0010, "vld3", "8"> {
+  let Inst{4} = 0;
+}
+def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> {
+  let Inst{5-4} = 0b00;
+}
+def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> {
+  let Inst{6-4} = 0b000;
+}
 
 // vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">;
-def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">;
+def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> {
+  let Inst{5-4} = 0b10;
+}
+def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> {
+  let Inst{6-4} = 0b100;
+}
 
 // vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">;
-def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">;
+def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> {
+  let Inst{5-4} = 0b10;
+}
+def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> {
+  let Inst{6-4} = 0b100;
+}
 
 //   VLD4LN   : Vector Load (single 4-element structure to one lane)
-class VLD4LN<bits<4> op11_8, string OpcodeStr>
-  : NLdSt<1,0b10,op11_8,0b0000,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-          nohash_imm:$lane), IIC_VLD4,
-          !strconcat(OpcodeStr,
-          "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"),
-          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
-
-def VLD4LNd8  : VLD4LN<0b0011, "vld4.8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">;
-def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">;
+class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
+  : NLdSt<1,0b10,op11_8,{?,?,?,?},
+            (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+            nohash_imm:$lane), IIC_VLD4,
+            OpcodeStr, Dt,
+           "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr",
+            "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+
+// vld4 to single-spaced registers.
+def VLD4LNd8  : VLD4LN<0b0011, "vld4", "8">;
+def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> {
+  let Inst{5} = 0;
+}
+def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> {
+  let Inst{6} = 0;
+}
 
 // vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">;
-def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">;
+def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> {
+  let Inst{5} = 1;
+}
+def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> {
+  let Inst{6} = 1;
+}
 
 // vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">;
-def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">;
+def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> {
+  let Inst{5} = 1;
+}
+def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> {
+  let Inst{6} = 1;
+}
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
@@ -349,178 +392,221 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">;
 } // mayLoad = 1, hasExtraDefRegAllocReq = 1
 
 //   VST1     : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VST1D<bits<4> op7_4, string OpcodeStr, string Dt,
+            ValueType Ty, Intrinsic IntOp>
   : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "",
+          OpcodeStr, Dt, "\\{$src\\}, $addr", "",
           [(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt,
+            ValueType Ty, Intrinsic IntOp>
   : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
-          !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "",
+          OpcodeStr, Dt, "${src:dregpair}, $addr", "",
           [(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
 
 let hasExtraSrcRegAllocReq = 1 in {
-def  VST1d8   : VST1D<0b0000, "vst1.8",  v8i8,  int_arm_neon_vst1>;
-def  VST1d16  : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>;
-def  VST1d32  : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>;
-def  VST1df   : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>;
-def  VST1d64  : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>;
-
-def  VST1q8   : VST1Q<0b0000, "vst1.8",  v16i8, int_arm_neon_vst1>;
-def  VST1q16  : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>;
-def  VST1q32  : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>;
-def  VST1qf   : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>;
-def  VST1q64  : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>;
+def  VST1d8   : VST1D<0b0000, "vst1", "8",  v8i8,  int_arm_neon_vst1>;
+def  VST1d16  : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>;
+def  VST1d32  : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>;
+def  VST1df   : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>;
+def  VST1d64  : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>;
+
+def  VST1q8   : VST1Q<0b0000, "vst1", "8",  v16i8, int_arm_neon_vst1>;
+def  VST1q16  : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>;
+def  VST1q32  : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>;
+def  VST1qf   : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
+def  VST1q64  : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
 } // hasExtraSrcRegAllocReq
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
 
 //   VST2     : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op7_4, string OpcodeStr>
+class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b1000,op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>;
-class VST2Q<bits<4> op7_4, string OpcodeStr>
+          OpcodeStr, Dt, "\\{$src1,$src2\\}, $addr", "", []>;
+class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0011,op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
           IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+          OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr",
           "", []>;
 
-def  VST2d8   : VST2D<0b0000, "vst2.8">;
-def  VST2d16  : VST2D<0b0100, "vst2.16">;
-def  VST2d32  : VST2D<0b1000, "vst2.32">;
+def  VST2d8   : VST2D<0b0000, "vst2", "8">;
+def  VST2d16  : VST2D<0b0100, "vst2", "16">;
+def  VST2d32  : VST2D<0b1000, "vst2", "32">;
 def  VST2d64  : NLdSt<0,0b00,0b1010,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-                      "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>;
+                      "vst1", "64", "\\{$src1,$src2\\}, $addr", "", []>;
 
-def  VST2q8   : VST2Q<0b0000, "vst2.8">;
-def  VST2q16  : VST2Q<0b0100, "vst2.16">;
-def  VST2q32  : VST2Q<0b1000, "vst2.32">;
+def  VST2q8   : VST2Q<0b0000, "vst2", "8">;
+def  VST2q16  : VST2Q<0b0100, "vst2", "16">;
+def  VST2q32  : VST2Q<0b1000, "vst2", "32">;
 
 //   VST3     : Vector Store (multiple 3-element structures)
-class VST3D<bits<4> op7_4, string OpcodeStr>
+class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0100,op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>;
-class VST3WB<bits<4> op7_4, string OpcodeStr>
+          OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "", []>;
+class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"),
+          OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr",
           "$addr.addr = $wb", []>;
 
-def  VST3d8   : VST3D<0b0000, "vst3.8">;
-def  VST3d16  : VST3D<0b0100, "vst3.16">;
-def  VST3d32  : VST3D<0b1000, "vst3.32">;
+def  VST3d8   : VST3D<0b0000, "vst3", "8">;
+def  VST3d16  : VST3D<0b0100, "vst3", "16">;
+def  VST3d32  : VST3D<0b1000, "vst3", "32">;
 def  VST3d64  : NLdSt<0,0b00,0b0110,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
                       IIC_VST,
-                      "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>;
+                      "vst1", "64", "\\{$src1,$src2,$src3\\}, $addr", "", []>;
 
 // vst3 to double-spaced even registers.
-def  VST3q8a  : VST3WB<0b0000, "vst3.8">;
-def  VST3q16a : VST3WB<0b0100, "vst3.16">;
-def  VST3q32a : VST3WB<0b1000, "vst3.32">;
+def  VST3q8a  : VST3WB<0b0000, "vst3", "8">;
+def  VST3q16a : VST3WB<0b0100, "vst3", "16">;
+def  VST3q32a : VST3WB<0b1000, "vst3", "32">;
 
 // vst3 to double-spaced odd registers.
-def  VST3q8b  : VST3WB<0b0000, "vst3.8">;
-def  VST3q16b : VST3WB<0b0100, "vst3.16">;
-def  VST3q32b : VST3WB<0b1000, "vst3.32">;
+def  VST3q8b  : VST3WB<0b0000, "vst3", "8">;
+def  VST3q16b : VST3WB<0b0100, "vst3", "16">;
+def  VST3q32b : VST3WB<0b1000, "vst3", "32">;
 
 //   VST4     : Vector Store (multiple 4-element structures)
-class VST4D<bits<4> op7_4, string OpcodeStr>
+class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0000,op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
           IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+          OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr",
           "", []>;
-class VST4WB<bits<4> op7_4, string OpcodeStr>
+class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
           IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"),
+          OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr",
           "$addr.addr = $wb", []>;
 
-def  VST4d8   : VST4D<0b0000, "vst4.8">;
-def  VST4d16  : VST4D<0b0100, "vst4.16">;
-def  VST4d32  : VST4D<0b1000, "vst4.32">;
+def  VST4d8   : VST4D<0b0000, "vst4", "8">;
+def  VST4d16  : VST4D<0b0100, "vst4", "16">;
+def  VST4d32  : VST4D<0b1000, "vst4", "32">;
 def  VST4d64  : NLdSt<0,0b00,0b0010,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
                        DPR:$src4), IIC_VST,
-                      "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>;
+                   "vst1", "64", "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>;
 
 // vst4 to double-spaced even registers.
-def  VST4q8a  : VST4WB<0b0000, "vst4.8">;
-def  VST4q16a : VST4WB<0b0100, "vst4.16">;
-def  VST4q32a : VST4WB<0b1000, "vst4.32">;
+def  VST4q8a  : VST4WB<0b0000, "vst4", "8">;
+def  VST4q16a : VST4WB<0b0100, "vst4", "16">;
+def  VST4q32a : VST4WB<0b1000, "vst4", "32">;
 
 // vst4 to double-spaced odd registers.
-def  VST4q8b  : VST4WB<0b0000, "vst4.8">;
-def  VST4q16b : VST4WB<0b0100, "vst4.16">;
-def  VST4q32b : VST4WB<0b1000, "vst4.32">;
+def  VST4q8b  : VST4WB<0b0000, "vst4", "8">;
+def  VST4q16b : VST4WB<0b0100, "vst4", "16">;
+def  VST4q32b : VST4WB<0b1000, "vst4", "32">;
 
 //   VST1LN   : Vector Store (single element from one lane)
 //   FIXME: Not yet implemented.
 
 //   VST2LN   : Vector Store (single 2-element structure from one lane)
-class VST2LN<bits<4> op11_8, string OpcodeStr>
-  : NLdSt<1,0b00,op11_8,0b0000, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-          IIC_VST,
-          !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"),
-          "", []>;
-
-def VST2LNd8  : VST2LN<0b0001, "vst2.8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2.16">;
-def VST2LNd32 : VST2LN<0b1001, "vst2.32">;
+class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
+  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+            IIC_VST,
+            OpcodeStr, Dt, "\\{$src1[$lane],$src2[$lane]\\}, $addr",
+            "", []>;
+
+// vst2 to single-spaced registers.
+def VST2LNd8  : VST2LN<0b0001, "vst2", "8">;
+def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> {
+  let Inst{5} = 0;
+}
+def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> {
+  let Inst{6} = 0;
+}
 
 // vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2.16">;
-def VST2LNq32a: VST2LN<0b1001, "vst2.32">;
+def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> {
+  let Inst{5} = 1;
+}
+def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> {
+  let Inst{6} = 1;
+}
 
 // vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2.16">;
-def VST2LNq32b: VST2LN<0b1001, "vst2.32">;
+def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> {
+  let Inst{5} = 1;
+}
+def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> {
+  let Inst{6} = 1;
+}
 
 //   VST3LN   : Vector Store (single 3-element structure from one lane)
-class VST3LN<bits<4> op11_8, string OpcodeStr>
-  : NLdSt<1,0b00,op11_8,0b0000, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-           nohash_imm:$lane), IIC_VST,
-          !strconcat(OpcodeStr,
-          "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>;
-
-def VST3LNd8  : VST3LN<0b0010, "vst3.8">;
-def VST3LNd16 : VST3LN<0b0110, "vst3.16">;
-def VST3LNd32 : VST3LN<0b1010, "vst3.32">;
+class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
+  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+            nohash_imm:$lane), IIC_VST,
+            OpcodeStr, Dt,
+            "\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr", "", []>;
+
+// vst3 to single-spaced registers.
+def VST3LNd8  : VST3LN<0b0010, "vst3", "8"> {
+  let Inst{4} = 0;
+}
+def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> {
+  let Inst{5-4} = 0b00;
+}
+def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> {
+  let Inst{6-4} = 0b000;
+}
 
 // vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3.16">;
-def VST3LNq32a: VST3LN<0b1010, "vst3.32">;
+def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> {
+  let Inst{5-4} = 0b10;
+}
+def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> {
+  let Inst{6-4} = 0b100;
+}
 
 // vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3.16">;
-def VST3LNq32b: VST3LN<0b1010, "vst3.32">;
+def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> {
+  let Inst{5-4} = 0b10;
+}
+def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> {
+  let Inst{6-4} = 0b100;
+}
 
 //   VST4LN   : Vector Store (single 4-element structure from one lane)
-class VST4LN<bits<4> op11_8, string OpcodeStr>
-  : NLdSt<1,0b00,op11_8,0b0000, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-           nohash_imm:$lane), IIC_VST,
-          !strconcat(OpcodeStr,
-          "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"),
-          "", []>;
-
-def VST4LNd8  : VST4LN<0b0011, "vst4.8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4.16">;
-def VST4LNd32 : VST4LN<0b1011, "vst4.32">;
+class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
+  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+            nohash_imm:$lane), IIC_VST,
+            OpcodeStr, Dt,
+           "\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr",
+            "", []>;
+
+// vst4 to single-spaced registers.
+def VST4LNd8  : VST4LN<0b0011, "vst4", "8">;
+def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> {
+  let Inst{5} = 0;
+}
+def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> {
+  let Inst{6} = 0;
+}
 
 // vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4.16">;
-def VST4LNq32a: VST4LN<0b1011, "vst4.32">;
+def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> {
+  let Inst{5} = 1;
+}
+def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> {
+  let Inst{6} = 1;
+}
 
 // vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4.16">;
-def VST4LNq32b: VST4LN<0b1011, "vst4.32">;
+def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> {
+  let Inst{5} = 1;
+}
+def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> {
+  let Inst{6} = 1;
+}
 
 } // mayStore = 1, hasExtraSrcRegAllocReq = 1
 
@@ -570,25 +656,25 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
 
 // Basic 2-register operations, both double- and quad-register.
 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
            ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
            ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
 
 // Basic 2-register operations, scalar single-precision.
 class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
             ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
         (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
-        IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", []>;
+        IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
 
 class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
   : NEONFPPat<(ResTy (OpNode SPR:$a)),
@@ -599,27 +685,27 @@ class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
 // Basic 2-register intrinsics, both double- and quad-register.
 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op4, 
-              InstrItinClass itin, string OpcodeStr,
+              InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op4,
-              InstrItinClass itin, string OpcodeStr,
+              InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
 
 // Basic 2-register intrinsics, scalar single-precision
 class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op4, 
-              InstrItinClass itin, string OpcodeStr,
+              InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
         (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src"), "", []>;
+        OpcodeStr, Dt, "$dst, $src", "", []>;
 
 class N2VDIntsPat<SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$a)),
@@ -630,49 +716,62 @@ class N2VDIntsPat<SDNode OpNode, NeonI Inst>
 // Narrow 2-register intrinsics.
 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
-              InstrItinClass itin, string OpcodeStr,
+              InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType TyD, ValueType TyQ, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
-        (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
 
 // Long 2-register intrinsics (currently only used for VMOVL).
 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
-              InstrItinClass itin, string OpcodeStr,
+              InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType TyQ, ValueType TyD, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
-        (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
 
 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
-class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr>
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
   : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2),
         (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 
-        !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+        OpcodeStr, Dt, "$dst1, $dst2",
         "$src1 = $dst1, $src2 = $dst2", []>;
 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
-                  InstrItinClass itin, string OpcodeStr>
+                  InstrItinClass itin, string OpcodeStr, string Dt>
   : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
         (ins QPR:$src1, QPR:$src2), itin, 
-        !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+        OpcodeStr, Dt, "$dst1, $dst2",
         "$src1 = $dst1, $src2 = $dst2", []>;
 
 // Basic 3-register operations, both double- and quad-register.
 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType ResTy, ValueType OpTy,
            SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2", "",
+        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+  let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr,
+           ValueType ResTy, ValueType OpTy,
+           SDNode OpNode, bit Commutable>
+  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
+        OpcodeStr, "$dst, $src1, $src2", "",
         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
   let isCommutable = Commutable;
 }
 class N3VDSL<bits<2> op21_20, bits<4> op11_8, 
-             InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp>
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType Ty, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (Ty DPR:$dst),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
@@ -680,11 +779,11 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
   let isCommutable = 0;
 }
 class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 
-               string OpcodeStr, ValueType Ty, SDNode ShOp>
+               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
         IIC_VMULi16D,
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (Ty DPR:$dst),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (NEONvduplane (Ty DPR_8:$src2),
@@ -693,20 +792,31 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
 }
 
 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType ResTy, ValueType OpTy,
            SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2", "",
+        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+  let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr,
+           ValueType ResTy, ValueType OpTy,
+           SDNode OpNode, bit Commutable>
+  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
+        OpcodeStr, "$dst, $src1, $src2", "",
         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
   let isCommutable = Commutable;
 }
 class N3VQSL<bits<2> op21_20, bits<4> op11_8, 
-             InstrItinClass itin, string OpcodeStr, 
+             InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType ResTy, ValueType OpTy, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (ResTy QPR:$dst),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -714,11 +824,12 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
   let isCommutable = 0;
 }
 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, 
-               string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp>
+               string OpcodeStr, string Dt,
+               ValueType ResTy, ValueType OpTy, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
         IIC_VMULi16Q,
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (ResTy QPR:$dst),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -728,11 +839,11 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
 
 // Basic 3-register operations, scalar single-precision
 class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           string OpcodeStr, ValueType ResTy, ValueType OpTy,
+           string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
            SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> {
+        OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
   let isCommutable = Commutable;
 }
 class N3VDsPat<SDNode OpNode, NeonI Inst>
@@ -744,19 +855,20 @@ class N3VDsPat<SDNode OpNode, NeonI Inst>
 
 // Basic 3-register intrinsics, both double- and quad-register.
 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2", "",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
   let isCommutable = Commutable;
 }
 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
-                string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+                string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (Ty DPR:$dst),
               (Ty (IntOp (Ty DPR:$src1),
                          (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
@@ -764,10 +876,10 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
   let isCommutable = 0;
 }
 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                  string OpcodeStr, ValueType Ty, Intrinsic IntOp>
+                  string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (Ty DPR:$dst),
               (Ty (IntOp (Ty DPR:$src1),
                          (Ty (NEONvduplane (Ty DPR_8:$src2),
@@ -776,19 +888,21 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 }
 
 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2", "",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
   let isCommutable = Commutable;
 }
 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
-                string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (ResTy QPR:$dst),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -796,10 +910,11 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
   let isCommutable = 0;
 }
 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                  string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+                  string OpcodeStr, string Dt,
+                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (ResTy QPR:$dst),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -809,30 +924,32 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 
 // Multiply-Add/Sub operations, both double- and quad-register.
 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                InstrItinClass itin, string OpcodeStr, 
+                InstrItinClass itin, string OpcodeStr, string Dt,
                 ValueType Ty, SDNode MulOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
         [(set DPR:$dst, (Ty (OpNode DPR:$src1,
                              (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                  string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp>
+                  string OpcodeStr, string Dt,
+                  ValueType Ty, SDNode MulOp, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$dst),
         (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
         [(set (Ty DPR:$dst),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (MulOp DPR:$src2,
                                    (Ty (NEONvduplane (Ty DPR_VFP2:$src3),
                                                      imm:$lane)))))))]>;
 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                    string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp>
+                    string OpcodeStr, string Dt,
+                    ValueType Ty, SDNode MulOp, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$dst),
         (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
         [(set (Ty DPR:$dst),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (MulOp DPR:$src2,
@@ -840,32 +957,33 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                                                      imm:$lane)))))))]>;
 
 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                InstrItinClass itin, string OpcodeStr, ValueType Ty,
+                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
                 SDNode MulOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
         [(set QPR:$dst, (Ty (OpNode QPR:$src1,
                              (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                  string OpcodeStr, ValueType ResTy, ValueType OpTy,
+                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
                   SDNode MulOp, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst),
         (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
         [(set (ResTy QPR:$dst),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$src2,
                                          (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
                                                               imm:$lane)))))))]>;
 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                    string OpcodeStr, ValueType ResTy, ValueType OpTy,
+                    string OpcodeStr, string Dt,
+                    ValueType ResTy, ValueType OpTy,
                     SDNode MulOp, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst),
         (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
         [(set (ResTy QPR:$dst),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$src2,
@@ -874,12 +992,12 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 
 // Multiply-Add/Sub operations, scalar single-precision
 class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                 InstrItinClass itin, string OpcodeStr,
+                 InstrItinClass itin, string OpcodeStr, string Dt,
                  ValueType Ty, SDNode MulOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR_VFP2:$dst),
         (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>;
+        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
 
 class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
@@ -892,50 +1010,51 @@ class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
 // Neon 3-argument intrinsics, both double- and quad-register.
 // The destination register is also used as the first source operand register.
 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-               InstrItinClass itin, string OpcodeStr,
+               InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
                                       (OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-               InstrItinClass itin, string OpcodeStr,
+               InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
                                       (OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
 
 // Neon Long 3-argument intrinsic.  The destination register is
 // a quad-register and is also used as the first source operand register.
 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-               InstrItinClass itin, string OpcodeStr,
+               InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType TyQ, ValueType TyD, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
         [(set QPR:$dst,
           (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+                 string OpcodeStr, string Dt,
+                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst),
         (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
         [(set (ResTy QPR:$dst),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (OpTy DPR:$src2),
                             (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
                                                 imm:$lane)))))]>;
 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                   string OpcodeStr, ValueType ResTy, ValueType OpTy,
+                   string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
                    Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst),
         (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
         [(set (ResTy QPR:$dst),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (OpTy DPR:$src2),
@@ -945,40 +1064,41 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti
 
 // Narrowing 3-register intrinsics.
 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              string OpcodeStr, ValueType TyD, ValueType TyQ,
+              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D,
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2", "",
         [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
   let isCommutable = Commutable;
 }
 
 // Long 3-register intrinsics.
 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD,
-              Intrinsic IntOp, bit Commutable>
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2", "",
         [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
   let isCommutable = Commutable;
 }
 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (ResTy QPR:$dst),
               (ResTy (IntOp (OpTy DPR:$src1),
                             (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
                                                 imm:$lane)))))]>;
 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                  string OpcodeStr, ValueType ResTy, ValueType OpTy, 
+                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 
                   Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 
-        itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "",
+        itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (ResTy QPR:$dst),
               (ResTy (IntOp (OpTy DPR:$src1),
                             (OpTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -986,128 +1106,135 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin
 
 // Wide 3-register intrinsics.
 class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-              string OpcodeStr, ValueType TyQ, ValueType TyD,
+              string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD,
-        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
+        OpcodeStr, Dt, "$dst, $src1, $src2", "",
         [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
   let isCommutable = Commutable;
 }
 
 // Pairwise long 2-register intrinsics, both double- and quad-register.
 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-                bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+                bits<2> op17_16, bits<5> op11_7, bit op4,
+                string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-                bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+                bits<2> op17_16, bits<5> op11_7, bit op4,
+                string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
 
 // Pairwise long 2-register accumulate intrinsics,
 // both double- and quad-register.
 // The destination register is also used as the first source operand register.
 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-                 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+                 bits<2> op17_16, bits<5> op11_7, bit op4,
+                 string OpcodeStr, string Dt,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
-        !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
         [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-                 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+                 bits<2> op17_16, bits<5> op11_7, bit op4,
+                 string OpcodeStr, string Dt,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
-        !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
+        OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
         [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
 
 // Shift by immediate,
 // both double- and quad-register.
 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-             InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode>
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, 0, op4,
            (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin,
-           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
            [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-             InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode>
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, 1, op4,
            (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
-           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
            [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
 
 // Long shift by immediate.
 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
-             string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+             string OpcodeStr, string Dt,
+             ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, op6, op4,
            (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD,
-           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
            [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
                                           (i32 imm:$SIMM))))]>;
 
 // Narrow shift by immediate.
 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
-             InstrItinClass itin, string OpcodeStr,
+             InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, op6, op4,
            (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
-           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
            [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
                                           (i32 imm:$SIMM))))]>;
 
 // Shift right by immediate and accumulate,
 // both double- and quad-register.
 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                string OpcodeStr, ValueType Ty, SDNode ShOp>
+                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
            (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD, 
-           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
            [(set DPR:$dst, (Ty (add DPR:$src1,
                                 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                string OpcodeStr, ValueType Ty, SDNode ShOp>
+                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
            (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD, 
-           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
            [(set QPR:$dst, (Ty (add QPR:$src1,
                                 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
 
 // Shift by immediate and insert,
 // both double- and quad-register.
 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                string OpcodeStr, ValueType Ty, SDNode ShOp>
+                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
            (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD, 
-           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
            [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                string OpcodeStr, ValueType Ty, SDNode ShOp>
+                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
            (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ, 
-           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
+           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
            [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
 
 // Convert, with fractional bits immediate,
 // both double- and quad-register.
 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-              string OpcodeStr, ValueType ResTy, ValueType OpTy,
+              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp>
   : N2VImm<op24, op23, op11_8, op7, 0, op4,
            (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, 
-           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
            [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-              string OpcodeStr, ValueType ResTy, ValueType OpTy,
+              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp>
   : N2VImm<op24, op23, op11_8, op7, 1, op4,
            (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, 
-           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
+           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
            [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
 
 //===----------------------------------------------------------------------===//
@@ -1126,41 +1253,55 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                    InstrItinClass itinD16, InstrItinClass itinD32,
                    InstrItinClass itinQ16, InstrItinClass itinQ32,
-                   string OpcodeStr, SDNode OpNode, bit Commutable = 0> {
+                   string OpcodeStr, string Dt,
+                   SDNode OpNode, bit Commutable = 0> {
   // 64-bit vector types.
   def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 
-                   !strconcat(OpcodeStr, "8"), v8i8, v8i8, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v8i8, v8i8, OpNode, Commutable>;
   def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
-                   !strconcat(OpcodeStr, "16"), v4i16, v4i16, OpNode, Commutable>;
+                 OpcodeStr, !strconcat(Dt, "16"),
+                 v4i16, v4i16, OpNode, Commutable>;
   def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
-                   !strconcat(OpcodeStr, "32"), v2i32, v2i32, OpNode, Commutable>;
+                 OpcodeStr, !strconcat(Dt, "32"),
+                 v2i32, v2i32, OpNode, Commutable>;
 
   // 128-bit vector types.
   def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
-                   !strconcat(OpcodeStr, "8"), v16i8, v16i8, OpNode, Commutable>;
+                  OpcodeStr, !strconcat(Dt, "8"),
+                  v16i8, v16i8, OpNode, Commutable>;
   def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
-                   !strconcat(OpcodeStr, "16"), v8i16, v8i16, OpNode, Commutable>;
+                 OpcodeStr, !strconcat(Dt, "16"),
+                 v8i16, v8i16, OpNode, Commutable>;
   def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
-                   !strconcat(OpcodeStr, "32"), v4i32, v4i32, OpNode, Commutable>;
+                 OpcodeStr, !strconcat(Dt, "32"),
+                 v4i32, v4i32, OpNode, Commutable>;
 }
 
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
-  def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>;
-  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>;
-  def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>;
-  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>;
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
+  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+                       v4i16, ShOp>;
+  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
+                     v2i32, ShOp>;
+  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+                       v8i16, v4i16, ShOp>;
+  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+                     v4i32, v2i32, ShOp>;
 }
 
 // ....then also with element size 64 bits:
 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                     InstrItinClass itinD, InstrItinClass itinQ,
-                    string OpcodeStr, SDNode OpNode, bit Commutable = 0>
+                    string OpcodeStr, string Dt,
+                    SDNode OpNode, bit Commutable = 0>
   : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
-            OpcodeStr, OpNode, Commutable> {
+            OpcodeStr, Dt, OpNode, Commutable> {
   def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
-                   !strconcat(OpcodeStr, "64"), v1i64, v1i64, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v1i64, v1i64, OpNode, Commutable>;
   def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
-                   !strconcat(OpcodeStr, "64"), v2i64, v2i64, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v2i64, v2i64, OpNode, Commutable>;
 }
 
 
@@ -1168,27 +1309,30 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 //   source operand element sizes of 16, 32 and 64 bits:
 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                        bits<5> op11_7, bit op6, bit op4, 
-                       InstrItinClass itin, string OpcodeStr,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
                        Intrinsic IntOp> {
   def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
-                      itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>;
+                      itin, OpcodeStr, !strconcat(Dt, "16"),
+                      v8i8, v8i16, IntOp>;
   def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
-                      itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>;
+                      itin, OpcodeStr, !strconcat(Dt, "32"),
+                      v4i16, v4i32, IntOp>;
   def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
-                      itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>;
+                      itin, OpcodeStr, !strconcat(Dt, "64"),
+                      v2i32, v2i64, IntOp>;
 }
 
 
 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
 //   source operand element sizes of 16, 32 and 64 bits:
 multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
-                       string OpcodeStr, Intrinsic IntOp> {
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
   def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
-                      !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
+                      OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
   def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
-                      !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+                      OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
   def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
-                      !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+                      OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
 }
 
 
@@ -1198,66 +1342,85 @@ multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
                      InstrItinClass itinD16, InstrItinClass itinD32,
                      InstrItinClass itinQ16, InstrItinClass itinQ32,
-                     string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
+                     string OpcodeStr, string Dt,
+                     Intrinsic IntOp, bit Commutable = 0> {
   // 64-bit vector types.
-  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"),
+  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16,
+                      OpcodeStr, !strconcat(Dt, "16"),
                       v4i16, v4i16, IntOp, Commutable>;
-  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"),
+  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32,
+                      OpcodeStr, !strconcat(Dt, "32"),
                       v2i32, v2i32, IntOp, Commutable>;
 
   // 128-bit vector types.
-  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"),
+  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "16"),
                       v8i16, v8i16, IntOp, Commutable>;
-  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"),
+  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "32"),
                       v4i32, v4i32, IntOp, Commutable>;
 }
 
 multiclass N3VIntSL_HS<bits<4> op11_8, 
                        InstrItinClass itinD16, InstrItinClass itinD32,
                        InstrItinClass itinQ16, InstrItinClass itinQ32,
-                       string OpcodeStr, Intrinsic IntOp> {
-  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>;
-  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>;
-  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>;
-  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>;
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
+  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
 }
 
 // ....then also with element size of 8 bits:
 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                       InstrItinClass itinD16, InstrItinClass itinD32,
                       InstrItinClass itinQ16, InstrItinClass itinQ32,
-                      string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp, bit Commutable = 0>
   : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
-              OpcodeStr, IntOp, Commutable> {
+              OpcodeStr, Dt, IntOp, Commutable> {
   def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
-                      !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>;
+                     OpcodeStr, !strconcat(Dt, "8"),
+                     v8i8, v8i8, IntOp, Commutable>;
   def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
-                      !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>;
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v16i8, v16i8, IntOp, Commutable>;
 }
 
 // ....then also with element size of 64 bits:
 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                        InstrItinClass itinD16, InstrItinClass itinD32,
                        InstrItinClass itinQ16, InstrItinClass itinQ32,
-                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0>
   : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
-               OpcodeStr, IntOp, Commutable> {
+               OpcodeStr, Dt, IntOp, Commutable> {
   def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
-                      !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v1i64, v1i64, IntOp, Commutable>;
   def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
-                      !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v2i64, v2i64, IntOp, Commutable>;
 }
 
 
 // Neon Narrowing 3-register vector intrinsics,
 //   source operand element sizes of 16, 32 and 64 bits:
 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
-  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"),
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0> {
+  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "16"),
                       v8i8, v8i16, IntOp, Commutable>;
-  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"),
+  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "32"),
                       v4i16, v4i32, IntOp, Commutable>;
-  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"),
+  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "64"),
                       v2i32, v2i64, IntOp, Commutable>;
 }
 
@@ -1266,41 +1429,50 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                      InstrItinClass itin, string OpcodeStr,
+                      InstrItinClass itin, string OpcodeStr, string Dt,
                       Intrinsic IntOp, bit Commutable = 0> {
   def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 
-                      !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>;
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i32, v4i16, IntOp, Commutable>;
   def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin,
-                      !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>;
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i64, v2i32, IntOp, Commutable>;
 }
 
 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
-                        InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> {
+                        InstrItinClass itin, string OpcodeStr, string Dt,
+                        Intrinsic IntOp> {
   def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 
-                          !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
   def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
-                        !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
 }
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       InstrItinClass itin, string OpcodeStr,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
                        Intrinsic IntOp, bit Commutable = 0>
-  : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> {
+  : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt,
+               IntOp, Commutable> {
   def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 
-                      !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>;
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i16, v8i8, IntOp, Commutable>;
 }
 
 
 // Neon Wide 3-register vector intrinsics,
 //   source operand element sizes of 8, 16 and 32 bits:
 multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
-  def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0> {
+  def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "8"),
                       v8i16, v8i8, IntOp, Commutable>;
-  def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
+  def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "16"),
                       v4i32, v4i16, IntOp, Commutable>;
-  def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
+  def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "32"),
                       v2i64, v2i32, IntOp, Commutable>;
 }
 
@@ -1310,57 +1482,57 @@ multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                         InstrItinClass itinD16, InstrItinClass itinD32,
                         InstrItinClass itinQ16, InstrItinClass itinQ32,
-                        string OpcodeStr, SDNode OpNode> {
+                        string OpcodeStr, string Dt, SDNode OpNode> {
   // 64-bit vector types.
   def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
-                        !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>;
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
   def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
-                        !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>;
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
   def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
-                        !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>;
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
 
   // 128-bit vector types.
   def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
-                        !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>;
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
   def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
-                        !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>;
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
   def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
-                        !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>;
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
 }
 
 multiclass N3VMulOpSL_HS<bits<4> op11_8, 
                          InstrItinClass itinD16, InstrItinClass itinD32,
                          InstrItinClass itinQ16, InstrItinClass itinQ32,
-                         string OpcodeStr, SDNode ShOp> {
+                         string OpcodeStr, string Dt, SDNode ShOp> {
   def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
-                            !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>;
+                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
   def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
-                          !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>;
+                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
   def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
-                            !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>;
+                      OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>;
   def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
-                          !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>;
+                      OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>;
 }
 
 // Neon 3-argument intrinsics,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       string OpcodeStr, Intrinsic IntOp> {
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
   def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
-                        !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
   def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
-                        !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
   def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
-                        !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
   def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
-                        !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
   def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
-                        !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
   def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
-                        !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
 }
 
 
@@ -1368,27 +1540,27 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       string OpcodeStr, Intrinsic IntOp> {
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
   def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
-                       !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
   def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D,
-                       !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
 }
 
 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
-                         string OpcodeStr, Intrinsic IntOp> {
+                         string OpcodeStr, string Dt, Intrinsic IntOp> {
   def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
-                           !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
+                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
   def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
-                         !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
+                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
 }
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                        string OpcodeStr, Intrinsic IntOp>
-  : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> {
+                        string OpcodeStr, string Dt, Intrinsic IntOp>
+  : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> {
   def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
-                       !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
 }
 
 
@@ -1397,22 +1569,22 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                       bits<5> op11_7, bit op4,
                       InstrItinClass itinD, InstrItinClass itinQ,
-                      string OpcodeStr, Intrinsic IntOp> {
+                      string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
   def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                      itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
+                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
   def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                      itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
+                   itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
   def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                      itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
+                   itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
   def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                      itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
+                    itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
   def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                      itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
+                   itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
   def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                      itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
+                   itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
 }
 
 
@@ -1420,22 +1592,22 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                         bits<5> op11_7, bit op4,
-                        string OpcodeStr, Intrinsic IntOp> {
+                        string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
   def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                        !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
   def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                        !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
   def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                        !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
 
   // 128-bit vector types.
   def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                        !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
   def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                        !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
   def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                        !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>;
+                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
 }
 
 
@@ -1443,61 +1615,62 @@ multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                          bits<5> op11_7, bit op4,
-                         string OpcodeStr, Intrinsic IntOp> {
+                         string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
   def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                         !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>;
+                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
   def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                         !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>;
+                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
   def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                         !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>;
+                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
 
   // 128-bit vector types.
   def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                         !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>;
+                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
   def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                         !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>;
+                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
   def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                         !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>;
+                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
 }
 
 
 // Neon 2-register vector shift by immediate,
 //   element sizes of 8, 16, 32 and 64 bits:
 multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
-                      InstrItinClass itin, string OpcodeStr, SDNode OpNode> {
+                      InstrItinClass itin, string OpcodeStr, string Dt,
+                      SDNode OpNode> {
   // 64-bit vector types.
   def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, itin,
-                     !strconcat(OpcodeStr, "8"), v8i8, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
-                     !strconcat(OpcodeStr, "16"), v4i16, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
-                     !strconcat(OpcodeStr, "32"), v2i32, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin,
-                     !strconcat(OpcodeStr, "64"), v1i64, OpNode>;
+                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
   def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
-                     !strconcat(OpcodeStr, "8"), v16i8, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
-                     !strconcat(OpcodeStr, "16"), v8i16, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
-                     !strconcat(OpcodeStr, "32"), v4i32, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin,
-                     !strconcat(OpcodeStr, "64"), v2i64, OpNode>;
+                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
                              // imm6 = xxxxxx
 }
 
@@ -1505,39 +1678,39 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 // Neon Shift-Accumulate vector operations,
 //   element sizes of 8, 16, 32 and 64 bits:
 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
-                         string OpcodeStr, SDNode ShOp> {
+                         string OpcodeStr, string Dt, SDNode ShOp> {
   // 64-bit vector types.
   def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "8"), v8i8, ShOp> {
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "16"), v4i16, ShOp> {
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "32"), v2i32, ShOp> {
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
-                        !strconcat(OpcodeStr, "64"), v1i64, ShOp>;
+                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
   def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "8"), v16i8, ShOp> {
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "16"), v8i16, ShOp> {
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "32"), v4i32, ShOp> {
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
-                        !strconcat(OpcodeStr, "64"), v2i64, ShOp>;
+                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
                              // imm6 = xxxxxx
 }
 
@@ -1548,53 +1721,53 @@ multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                          string OpcodeStr, SDNode ShOp> {
   // 64-bit vector types.
   def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "8"), v8i8, ShOp> {
+                        OpcodeStr, "8", v8i8, ShOp> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "16"), v4i16, ShOp> {
+                        OpcodeStr, "16", v4i16, ShOp> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "32"), v2i32, ShOp> {
+                        OpcodeStr, "32", v2i32, ShOp> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
-                        !strconcat(OpcodeStr, "64"), v1i64, ShOp>;
+                        OpcodeStr, "64", v1i64, ShOp>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
   def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "8"), v16i8, ShOp> {
+                        OpcodeStr, "8", v16i8, ShOp> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "16"), v8i16, ShOp> {
+                        OpcodeStr, "16", v8i16, ShOp> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
-                        !strconcat(OpcodeStr, "32"), v4i32, ShOp> {
+                        OpcodeStr, "32", v4i32, ShOp> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
-                        !strconcat(OpcodeStr, "64"), v2i64, ShOp>;
+                        OpcodeStr, "64", v2i64, ShOp>;
                              // imm6 = xxxxxx
 }
 
 // Neon Shift Long operations,
 //   element sizes of 8, 16, 32 bits:
 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
-                      bit op4, string OpcodeStr, SDNode OpNode> {
+                      bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
   def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                 !strconcat(OpcodeStr, "8"), v8i16, v8i8, OpNode> {
+                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                  !strconcat(OpcodeStr, "16"), v4i32, v4i16, OpNode> {
+                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                  !strconcat(OpcodeStr, "32"), v2i64, v2i32, OpNode> {
+                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
 }
@@ -1602,18 +1775,18 @@ multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
 // Neon Shift Narrow operations,
 //   element sizes of 16, 32, 64 bits:
 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
-                      bit op4, InstrItinClass itin, string OpcodeStr,
+                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
                       SDNode OpNode> {
   def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
-                    !strconcat(OpcodeStr, "16"), v8i8, v8i16, OpNode> {
+                    OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
-                     !strconcat(OpcodeStr, "32"), v4i16, v4i32, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
-                     !strconcat(OpcodeStr, "64"), v2i32, v2i64, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
 }
@@ -1625,49 +1798,58 @@ multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
 // Vector Add Operations.
 
 //   VADD     : Vector Add (integer and floating-point)
-defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>;
-def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>;
-def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>;
+defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+                         add, 1>;
+def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+                     v2f32, v2f32, fadd, 1>;
+def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+                     v4f32, v4f32, fadd, 1>;
 //   VADDL    : Vector Add Long (Q = D + D)
-defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>;
-defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>;
+defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s",
+                            int_arm_neon_vaddls, 1>;
+defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u",
+                            int_arm_neon_vaddlu, 1>;
 //   VADDW    : Vector Add Wide (Q = Q + D)
-defm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>;
-defm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>;
+defm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
+defm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
 //   VHADD    : Vector Halving Add
 defm VHADDs   : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>;
+                           IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>;
 defm VHADDu   : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>;
+                           IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>;
 //   VRHADD   : Vector Rounding Halving Add
 defm VRHADDs  : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>;
+                           IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>;
 defm VRHADDu  : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>;
+                           IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
 //   VQADD    : Vector Saturating Add
 defm VQADDs   : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                            IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>;
+                            IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>;
 defm VQADDu   : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                            IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>;
+                            IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>;
 //   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
-defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>;
+defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+                            int_arm_neon_vaddhn, 1>;
 //   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
-defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
+defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+                            int_arm_neon_vraddhn, 1>;
 
 // Vector Multiply Operations.
 
 //   VMUL     : Vector Multiply (integer, polynomial and floating-point)
-defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q,
-                        IIC_VMULi32Q, "vmul.i", mul, 1>;
-def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8,
-                        int_arm_neon_vmulp, 1>;
-def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8,
-                        int_arm_neon_vmulp, 1>;
-def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>;
-def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>;
-defm VMULsl  : N3VSL_HS<0b1000, "vmul.i", mul>;
-def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>;
-def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>;
+defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8",
+                        v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8",
+                        v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
+                        v2f32, v2f32, fmul, 1>;
+def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
+                        v4f32, v4f32, fmul, 1>;
+defm VMULsl  : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>;
 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
                       (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
           (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
@@ -1690,66 +1872,80 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
 //   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
 defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
                           IIC_VMULi16Q, IIC_VMULi32Q, 
-                          "vqdmulh.s", int_arm_neon_vqdmulh, 1>;
+                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
                             IIC_VMULi16Q, IIC_VMULi32Q,
-                            "vqdmulh.s",  int_arm_neon_vqdmulh>;
+                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
-                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                                            imm:$lane)))),
           (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
-                                                        (DSubReg_i16_reg imm:$lane))),
+                                                  (DSubReg_i16_reg imm:$lane))),
                                  (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
-                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                                            imm:$lane)))),
           (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
-                                                        (DSubReg_i32_reg imm:$lane))),
+                                                  (DSubReg_i32_reg imm:$lane))),
                                  (SubReg_i32_lane imm:$lane)))>;
 
 //   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
 defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
                             IIC_VMULi16Q, IIC_VMULi32Q,
-                            "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>;
+                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
                               IIC_VMULi16Q, IIC_VMULi32Q,
-                              "vqrdmulh.s",  int_arm_neon_vqrdmulh>;
+                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
-                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                                             imm:$lane)))),
           (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
                                   (v4i16 (EXTRACT_SUBREG QPR:$src2,
                                                          (DSubReg_i16_reg imm:$lane))),
                                   (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
-                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                                             imm:$lane)))),
           (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
                                   (v2i32 (EXTRACT_SUBREG QPR:$src2,
-                                                         (DSubReg_i32_reg imm:$lane))),
+                                                  (DSubReg_i32_reg imm:$lane))),
                                   (SubReg_i32_lane imm:$lane)))>;
 
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>;
-defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>;
-def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8,
-                        int_arm_neon_vmullp, 1>;
-defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>;
-defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>;
+defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s",
+                            int_arm_neon_vmulls, 1>;
+defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u",
+                            int_arm_neon_vmullu, 1>;
+def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+                        v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
+                             int_arm_neon_vmulls>;
+defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
+                             int_arm_neon_vmullu>;
 
 //   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
-defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>;
-defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>;
+defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s",
+                           int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s",
+                             int_arm_neon_vqdmull>;
 
 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
 
 //   VMLA     : Vector Multiply Accumulate (integer and floating-point)
 defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
-                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>;
-def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>;
-def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>;
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+                          v2f32, fmul, fadd>;
+def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+                          v4f32, fmul, fadd>;
 defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
-                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>;
-def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>;
-def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>;
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+                            v2f32, fmul, fadd>;
+def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+                            v4f32, v2f32, fmul, fadd>;
 
 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
                       (mul (v8i16 QPR:$src2),
@@ -1766,7 +1962,7 @@ def : Pat<(v4i32 (add (v4i32 QPR:$src1),
           (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1),
                               (v4i32 QPR:$src2),
                               (v2i32 (EXTRACT_SUBREG QPR:$src3,
-                                                     (DSubReg_i32_reg imm:$lane))),
+                                                  (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
 
 def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
@@ -1779,25 +1975,30 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>;
-defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>;
+defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>;
+defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>;
 
-defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>;
-defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>;
+defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
+defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
 
 //   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
-defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>;
-defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>;
+defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s",
+                            int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
 
 //   VMLS     : Vector Multiply Subtract (integer and floating-point)
 defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
-                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>;
-def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>;
-def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>;
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+                          v2f32, fmul, fsub>;
+def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+                          v4f32, fmul, fsub>;
 defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
-                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>;
-def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>;
-def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>;
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+                            v2f32, fmul, fsub>;
+def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+                            v4f32, v2f32, fmul, fsub>;
 
 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
                       (mul (v8i16 QPR:$src2),
@@ -1810,7 +2011,7 @@ def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
 
 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
                       (mul (v4i32 QPR:$src2),
-                           (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+                         (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
           (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1),
                               (v4i32 QPR:$src2),
                               (v2i32 (EXTRACT_SUBREG QPR:$src3,
@@ -1819,7 +2020,7 @@ def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
 
 def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
                        (fmul (v4f32 QPR:$src2),
-                             (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+                           (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
           (v4f32 (VMLSslfq (v4f32 QPR:$src1),
                            (v4f32 QPR:$src2),
                            (v2f32 (EXTRACT_SUBREG QPR:$src3,
@@ -1827,146 +2028,170 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>;
-defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
+defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>;
+defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>;
 
-defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>;
-defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>;
+defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
+defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
 
 //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
-defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
+defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s",
+                            int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 
 // Vector Subtract Operations.
 
 //   VSUB     : Vector Subtract (integer and floating-point)
-defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>;
-def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>;
-def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>;
+defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+                         "vsub", "i", sub, 0>;
+def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+                     v2f32, v2f32, fsub, 0>;
+def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+                     v4f32, v4f32, fsub, 0>;
 //   VSUBL    : Vector Subtract Long (Q = D - D)
-defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>;
-defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>;
+defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s",
+                            int_arm_neon_vsubls, 1>;
+defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u",
+                            int_arm_neon_vsublu, 1>;
 //   VSUBW    : Vector Subtract Wide (Q = Q - D)
-defm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>;
-defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>;
+defm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
+defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
 //   VHSUB    : Vector Halving Subtract
-defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>;
-defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>;
+defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
+                           IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
+                           IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
 //   VQSUB    : Vector Saturing Subtract
-defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                            IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>;
-defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                            IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>;
+defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
+                            IIC_VBINi4Q, IIC_VBINi4Q,
+                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
+                            IIC_VBINi4Q, IIC_VBINi4Q,
+                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
 //   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
-defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
+defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+                            int_arm_neon_vsubhn, 0>;
 //   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
-defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
+defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+                            int_arm_neon_vrsubhn, 0>;
 
 // Vector Comparisons.
 
 //   VCEQ     : Vector Compare Equal
 defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vceq.i", NEONvceq, 1>;
-def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>;
-def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>;
+                        IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>;
+def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+                     NEONvceq, 1>;
+def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+                     NEONvceq, 1>;
 //   VCGE     : Vector Compare Greater Than or Equal
 defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vcge.s", NEONvcge, 0>;
+                        IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
 defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>;
-def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>;
-def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>;
+                        IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>;
+def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32",
+                     v2i32, v2f32, NEONvcge, 0>;
+def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+                     NEONvcge, 0>;
 //   VCGT     : Vector Compare Greater Than
 defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>;
+                        IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
 defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>;
-def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>;
-def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>;
+                        IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+                     NEONvcgt, 0>;
+def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+                     NEONvcgt, 0>;
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32,
-                        int_arm_neon_vacged, 0>;
-def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32,
-                        int_arm_neon_vacgeq, 0>;
+def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
+                        v2i32, v2f32, int_arm_neon_vacged, 0>;
+def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32",
+                        v4i32, v4f32, int_arm_neon_vacgeq, 0>;
 //   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
-def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32,
-                        int_arm_neon_vacgtd, 0>;
-def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32,
-                        int_arm_neon_vacgtq, 0>;
+def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32",
+                        v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32",
+                        v4i32, v4f32, int_arm_neon_vacgtq, 0>;
 //   VTST     : Vector Test Bits
 defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vtst.i", NEONvtst, 1>;
+                        IIC_VBINi4Q, "vtst", "i", NEONvtst, 1>;
 
 // Vector Bitwise Operations.
 
 //   VAND     : Vector Bitwise AND
-def  VANDd    : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>;
-def  VANDq    : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>;
+def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+                      v2i32, v2i32, and, 1>;
+def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+                      v4i32, v4i32, and, 1>;
 
 //   VEOR     : Vector Bitwise Exclusive OR
-def  VEORd    : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>;
-def  VEORq    : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>;
+def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+                      v2i32, v2i32, xor, 1>;
+def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+                      v4i32, v4i32, xor, 1>;
 
 //   VORR     : Vector Bitwise OR
-def  VORRd    : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>;
-def  VORRq    : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>;
+def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+                      v2i32, v2i32, or, 1>;
+def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+                      v4i32, v4i32, or, 1>;
 
 //   VBIC     : Vector Bitwise Bit Clear (AND NOT)
-def  VBICd    : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
+def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
                     (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
-                    "vbic\t$dst, $src1, $src2", "",
+                    "vbic", "$dst, $src1, $src2", "",
                     [(set DPR:$dst, (v2i32 (and DPR:$src1,
                                                 (vnot_conv DPR:$src2))))]>;
-def  VBICq    : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
+def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
                     (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
-                    "vbic\t$dst, $src1, $src2", "",
+                    "vbic", "$dst, $src1, $src2", "",
                     [(set QPR:$dst, (v4i32 (and QPR:$src1,
                                                 (vnot_conv QPR:$src2))))]>;
 
 //   VORN     : Vector Bitwise OR NOT
-def  VORNd    : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
+def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
                     (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
-                    "vorn\t$dst, $src1, $src2", "",
+                    "vorn", "$dst, $src1, $src2", "",
                     [(set DPR:$dst, (v2i32 (or DPR:$src1,
                                                (vnot_conv DPR:$src2))))]>;
-def  VORNq    : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
+def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
                     (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
-                    "vorn\t$dst, $src1, $src2", "",
+                    "vorn", "$dst, $src1, $src2", "",
                     [(set QPR:$dst, (v4i32 (or QPR:$src1,
                                                (vnot_conv QPR:$src2))))]>;
 
 //   VMVN     : Vector Bitwise NOT
-def  VMVNd    : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
                     (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
-                    "vmvn\t$dst, $src", "",
+                    "vmvn", "$dst, $src", "",
                     [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>;
-def  VMVNq    : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
                     (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
-                    "vmvn\t$dst, $src", "",
+                    "vmvn", "$dst, $src", "",
                     [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>;
 def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>;
 def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>;
 
 //   VBSL     : Vector Bitwise Select
-def  VBSLd    : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
+def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
                     (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD,
-                    "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
+                    "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
                     [(set DPR:$dst,
                       (v2i32 (or (and DPR:$src2, DPR:$src1),
                                  (and DPR:$src3, (vnot_conv DPR:$src1)))))]>;
-def  VBSLq    : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
+def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
                     (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ,
-                    "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
+                    "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
                     [(set QPR:$dst,
                       (v4i32 (or (and QPR:$src2, QPR:$src1),
                                  (and QPR:$src3, (vnot_conv QPR:$src1)))))]>;
 
 //   VBIF     : Vector Bitwise Insert if False
-//              like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst",
+//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
 //   VBIT     : Vector Bitwise Insert if True
-//              like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst",
+//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
 // These are not yet implemented.  The TwoAddress pass will not go looking
 // for equivalent operations with different register constraints; it just
 // inserts copies.
@@ -1974,259 +2199,270 @@ def  VBSLq    : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
 // Vector Absolute Differences.
 
 //   VABD     : Vector Absolute Difference
-defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>;
-defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>;
-def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32,
-                        int_arm_neon_vabds, 0>;
-def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32,
-                        int_arm_neon_vabds, 0>;
+defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
+                           IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vabd", "s", int_arm_neon_vabds, 0>;
+defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
+                           IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vabd", "u", int_arm_neon_vabdu, 0>;
+def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND,
+                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
+def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ,
+                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>;
-defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>;
+defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q,
+                            "vabdl", "s", int_arm_neon_vabdls, 0>;
+defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q,
+                             "vabdl", "u", int_arm_neon_vabdlu, 0>;
 
 //   VABA     : Vector Absolute Difference and Accumulate
-defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, "vaba.s", int_arm_neon_vabas>;
-defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, "vaba.u", int_arm_neon_vabau>;
+defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>;
+defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>;
 
 //   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>;
-defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>;
+defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>;
+defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
 
 // Vector Maximum and Minimum.
 
 //   VMAX     : Vector Maximum
 defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>;
+                           IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
 defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>;
-def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32,
-                        int_arm_neon_vmaxs, 1>;
-def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32,
-                        int_arm_neon_vmaxs, 1>;
+                           IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
+                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
 
 //   VMIN     : Vector Minimum
 defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>;
+                           IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
 defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                           IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>;
-def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32,
-                        int_arm_neon_vmins, 1>;
-def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32,
-                        int_arm_neon_vmins, 1>;
+                           IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vmins, 1>;
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32",
+                        v4f32, v4f32, int_arm_neon_vmins, 1>;
 
 // Vector Pairwise Operations.
 
 //   VPADD    : Vector Pairwise Add
-def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8,
-                        int_arm_neon_vpadd, 0>;
-def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16,
-                        int_arm_neon_vpadd, 0>;
-def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32,
-                        int_arm_neon_vpadd, 0>;
-def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32,
-                        int_arm_neon_vpadd, 0>;
+def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8",
+                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16",
+                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32",
+                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32",
+                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
 
 //   VPADDL   : Vector Pairwise Add Long
-defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s",
+defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
                              int_arm_neon_vpaddls>;
-defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u",
+defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
                              int_arm_neon_vpaddlu>;
 
 //   VPADAL   : Vector Pairwise Add and Accumulate Long
-defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s",
+defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
                               int_arm_neon_vpadals>;
-defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u",
+defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
                               int_arm_neon_vpadalu>;
 
 //   VPMAX    : Vector Pairwise Maximum
-def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8,
-                        int_arm_neon_vpmaxs, 0>;
-def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16,
-                        int_arm_neon_vpmaxs, 0>;
-def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32,
-                        int_arm_neon_vpmaxs, 0>;
-def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8,
-                        int_arm_neon_vpmaxu, 0>;
-def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16,
-                        int_arm_neon_vpmaxu, 0>;
-def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32,
-                        int_arm_neon_vpmaxu, 0>;
-def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32,
-                        int_arm_neon_vpmaxs, 0>;
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8",
+                        v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16",
+                        v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32",
+                        v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8",
+                        v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16",
+                        v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32",
+                        v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
 
 //   VPMIN    : Vector Pairwise Minimum
-def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8,
-                        int_arm_neon_vpmins, 0>;
-def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16,
-                        int_arm_neon_vpmins, 0>;
-def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32,
-                        int_arm_neon_vpmins, 0>;
-def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8,
-                        int_arm_neon_vpminu, 0>;
-def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16,
-                        int_arm_neon_vpminu, 0>;
-def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32,
-                        int_arm_neon_vpminu, 0>;
-def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32,
-                        int_arm_neon_vpmins, 0>;
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8",
+                        v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16",
+                        v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32",
+                        v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8",
+                        v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16",
+                        v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32",
+                        v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vpmins, 0>;
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
 
 //   VRECPE   : Vector Reciprocal Estimate
 def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
-                        IIC_VUNAD, "vrecpe.u32",
+                        IIC_VUNAD, "vrecpe", "u32",
                         v2i32, v2i32, int_arm_neon_vrecpe>;
 def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
-                        IIC_VUNAQ, "vrecpe.u32",
+                        IIC_VUNAQ, "vrecpe", "u32",
                         v4i32, v4i32, int_arm_neon_vrecpe>;
 def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
-                        IIC_VUNAD, "vrecpe.f32",
+                        IIC_VUNAD, "vrecpe", "f32",
                         v2f32, v2f32, int_arm_neon_vrecpe>;
 def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
-                        IIC_VUNAQ, "vrecpe.f32",
+                        IIC_VUNAQ, "vrecpe", "f32",
                         v4f32, v4f32, int_arm_neon_vrecpe>;
 
 //   VRECPS   : Vector Reciprocal Step
-def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32,
-                        int_arm_neon_vrecps, 1>;
-def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32,
-                        int_arm_neon_vrecps, 1>;
+def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1,
+                        IIC_VRECSD, "vrecps", "f32",
+                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1,
+                        IIC_VRECSQ, "vrecps", "f32",
+                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
 
 //   VRSQRTE  : Vector Reciprocal Square Root Estimate
 def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
-                         IIC_VUNAD, "vrsqrte.u32",
+                         IIC_VUNAD, "vrsqrte", "u32",
                          v2i32, v2i32, int_arm_neon_vrsqrte>;
 def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
-                         IIC_VUNAQ, "vrsqrte.u32",
+                         IIC_VUNAQ, "vrsqrte", "u32",
                          v4i32, v4i32, int_arm_neon_vrsqrte>;
 def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
-                         IIC_VUNAD, "vrsqrte.f32",
+                         IIC_VUNAD, "vrsqrte", "f32",
                          v2f32, v2f32, int_arm_neon_vrsqrte>;
 def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 
-                         IIC_VUNAQ, "vrsqrte.f32",
+                         IIC_VUNAQ, "vrsqrte", "f32",
                          v4f32, v4f32, int_arm_neon_vrsqrte>;
 
 //   VRSQRTS  : Vector Reciprocal Square Root Step
-def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32,
-                        int_arm_neon_vrsqrts, 1>;
-def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32,
-                        int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1,
+                        IIC_VRECSD, "vrsqrts", "f32",
+                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1,
+                        IIC_VRECSQ, "vrsqrts", "f32",
+                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
 
 // Vector Shifts.
 
 //   VSHL     : Vector Shift
 defm VSHLs    : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
-                            IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>;
+                            IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>;
 defm VSHLu    : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
-                            IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>;
+                            IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>;
 //   VSHL     : Vector Shift Left (Immediate)
-defm VSHLi    : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>;
+defm VSHLi    : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
 //   VSHR     : Vector Shift Right (Immediate)
-defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>;
-defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>;
+defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>;
+defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>;
 
 //   VSHLL    : Vector Shift Left Long
-defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>;
-defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>;
+defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
 
 //   VSHLL    : Vector Shift Left Long (with maximum shift count)
 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
-                bit op6, bit op4, string OpcodeStr, ValueType ResTy,
+                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
                 ValueType OpTy, SDNode OpNode>
-  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, ResTy, OpTy, OpNode> {
+  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+           ResTy, OpTy, OpNode> {
   let Inst{21-16} = op21_16;
 }
-def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8",
+def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
                           v8i16, v8i8, NEONvshlli>;
-def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16",
+def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
                           v4i32, v4i16, NEONvshlli>;
-def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32",
+def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
                           v2i64, v2i32, NEONvshlli>;
 
 //   VSHRN    : Vector Shift Right and Narrow
-defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>;
+defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>;
 
 //   VRSHL    : Vector Rounding Shift
 defm VRSHLs   : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>;
+                            IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>;
 defm VRSHLu   : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>;
+                            IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>;
 //   VRSHR    : Vector Rounding Shift Right
-defm VRSHRs   : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>;
-defm VRSHRu   : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>;
+defm VRSHRs   : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
+defm VRSHRu   : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
 
 //   VRSHRN   : Vector Rounding Shift Right and Narrow
-defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i",
+defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
                            NEONvrshrn>;
 
 //   VQSHL    : Vector Saturating Shift
 defm VQSHLs   : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>;
+                            IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>;
 defm VQSHLu   : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>;
+                            IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>;
 //   VQSHL    : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi  : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>;
-defm VQSHLui  : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>;
+defm VQSHLsi  : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
+defm VQSHLui  : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
 //   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu  : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>;
+defm VQSHLsu  : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>;
 
 //   VQSHRN   : Vector Saturating Shift Right and Narrow
-defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s",
+defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
                            NEONvqshrns>;
-defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u",
+defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
                            NEONvqshrnu>;
 
 //   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
-defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s",
+defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
                            NEONvqshrnsu>;
 
 //   VQRSHL   : Vector Saturating Rounding Shift
 defm VQRSHLs  : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>;
+                            IIC_VSHLi4Q, "vqrshl", "s",
+                            int_arm_neon_vqrshifts, 0>;
 defm VQRSHLu  : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>;
+                            IIC_VSHLi4Q, "vqrshl", "u",
+                            int_arm_neon_vqrshiftu, 0>;
 
 //   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
-defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s",
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
                            NEONvqrshrns>;
-defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u",
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
                            NEONvqrshrnu>;
 
 //   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
-defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s",
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
                            NEONvqrshrnsu>;
 
 //   VSRA     : Vector Shift Right and Accumulate
-defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>;
-defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>;
+defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
 //   VRSRA    : Vector Rounding Shift Right and Accumulate
-defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>;
-defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>;
+defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
 
 //   VSLI     : Vector Shift Left and Insert
-defm VSLI     : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>;
+defm VSLI     : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>;
 //   VSRI     : Vector Shift Right and Insert
-defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>;
+defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>;
 
 // Vector Absolute and Saturating Absolute.
 
 //   VABS     : Vector Absolute Value
 defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 
-                           IIC_VUNAiD, IIC_VUNAiQ, "vabs.s",
+                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
                            int_arm_neon_vabs>;
 def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
-                        IIC_VUNAD, "vabs.f32",
+                        IIC_VUNAD, "vabs", "f32",
                         v2f32, v2f32, int_arm_neon_vabs>;
 def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
-                        IIC_VUNAQ, "vabs.f32",
+                        IIC_VUNAQ, "vabs", "f32",
                         v4f32, v4f32, int_arm_neon_vabs>;
 
 //   VQABS    : Vector Saturating Absolute Value
 defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 
-                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s",
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
                            int_arm_neon_vqabs>;
 
 // Vector Negate.
@@ -2234,31 +2470,31 @@ defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
 def vneg      : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
 def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>;
 
-class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty>
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
-        IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (Ty (vneg DPR:$src)))]>;
-class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty>
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
-        IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (Ty (vneg QPR:$src)))]>;
 
 //   VNEG     : Vector Negate
-def  VNEGs8d  : VNEGD<0b00, "vneg.s8", v8i8>;
-def  VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>;
-def  VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>;
-def  VNEGs8q  : VNEGQ<0b00, "vneg.s8", v16i8>;
-def  VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>;
-def  VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>;
+def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
+def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
 
 //   VNEG     : Vector Negate (floating-point)
 def  VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
                     (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
-                    "vneg.f32\t$dst, $src", "",
+                    "vneg", "f32", "$dst, $src", "",
                     [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
 def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
                     (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ,
-                    "vneg.f32\t$dst, $src", "",
+                    "vneg", "f32", "$dst, $src", "",
                     [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
 
 def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
@@ -2270,35 +2506,35 @@ def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>;
 
 //   VQNEG    : Vector Saturating Negate
 defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 
-                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s",
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
                            int_arm_neon_vqneg>;
 
 // Vector Bit Counting Operations.
 
 //   VCLS     : Vector Count Leading Sign Bits
 defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 
-                           IIC_VCNTiD, IIC_VCNTiQ, "vcls.s",
+                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
                            int_arm_neon_vcls>;
 //   VCLZ     : Vector Count Leading Zeros
 defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 
-                           IIC_VCNTiD, IIC_VCNTiQ, "vclz.i",
+                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
                            int_arm_neon_vclz>;
 //   VCNT     : Vector Count One Bits
 def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 
-                        IIC_VCNTiD, "vcnt.8",
+                        IIC_VCNTiD, "vcnt", "8",
                         v8i8, v8i8, int_arm_neon_vcnt>;
 def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
-                        IIC_VCNTiQ, "vcnt.8",
+                        IIC_VCNTiQ, "vcnt", "8",
                         v16i8, v16i8, int_arm_neon_vcnt>;
 
 // Vector Move Operations.
 
 //   VMOV     : Vector Move (Register)
 
-def  VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
-                    IIC_VMOVD, "vmov\t$dst, $src", "", []>;
-def  VMOVQ    : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
-                    IIC_VMOVD, "vmov\t$dst, $src", "", []>;
+def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
+                    IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
+                    IIC_VMOVD, "vmov", "$dst, $src", "", []>;
 
 //   VMOV     : Vector Move (Immediate)
 
@@ -2339,65 +2575,65 @@ def vmovImm64 : PatLeaf<(build_vector), [{
 
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
                          (ins h8imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i8\t$dst, $SIMM", "",
+                         "vmov", "i8", "$dst, $SIMM", "",
                          [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
                          (ins h8imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i8\t$dst, $SIMM", "",
+                         "vmov", "i8", "$dst, $SIMM", "",
                          [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
 
-def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
                          (ins h16imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i16\t$dst, $SIMM", "",
+                         "vmov", "i16", "$dst, $SIMM", "",
                          [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
-def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst),
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
                          (ins h16imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i16\t$dst, $SIMM", "",
+                         "vmov", "i16", "$dst, $SIMM", "",
                          [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
 
-def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
                          (ins h32imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i32\t$dst, $SIMM", "",
+                         "vmov", "i32", "$dst, $SIMM", "",
                          [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst),
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
                          (ins h32imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i32\t$dst, $SIMM", "",
+                         "vmov", "i32", "$dst, $SIMM", "",
                          [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
 
 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
                          (ins h64imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i64\t$dst, $SIMM", "",
+                         "vmov", "i64", "$dst, $SIMM", "",
                          [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
                          (ins h64imm:$SIMM), IIC_VMOVImm,
-                         "vmov.i64\t$dst, $SIMM", "",
+                         "vmov", "i64", "$dst, $SIMM", "",
                          [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
 
 //   VMOV     : Vector Get Lane (move scalar to ARM core register)
 
-def VGETLNs8  : NVGetLane<0b11100101, 0b1011, 0b00,
+def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
                           (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]",
+                          IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
                                            imm:$lane))]>;
-def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01,
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
                           (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]",
+                          IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
                                            imm:$lane))]>;
-def VGETLNu8  : NVGetLane<0b11101101, 0b1011, 0b00,
+def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
                           (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]",
+                          IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
                                            imm:$lane))]>;
-def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01,
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
                           (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]",
+                          IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]",
                           [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
                                            imm:$lane))]>;
-def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00,
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
                           (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]",
+                          IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]",
                           [(set GPR:$dst, (extractelt (v2i32 DPR:$src),
                                            imm:$lane))]>;
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -2436,19 +2672,19 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
 //   VMOV     : Vector Set Lane (move ARM core register to scalar)
 
 let Constraints = "$src1 = $dst" in {
-def VSETLNi8  : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst),
+def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst),
                           (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2",
+                          IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2",
                           [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
                                            GPR:$src2, imm:$lane))]>;
-def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst),
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst),
                           (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2",
+                          IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2",
                           [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
                                            GPR:$src2, imm:$lane))]>;
-def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst),
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst),
                           (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2",
+                          IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2",
                           [(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
                                            GPR:$src2, imm:$lane))]>;
 }
@@ -2512,55 +2748,57 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
 
 //   VDUP     : Vector Duplicate (from ARM core register to all elements)
 
-class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
   : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
-          IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"),
+          IIC_VMOVIS, "vdup", Dt, "$dst, $src",
           [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
-class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
   : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
-          IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"),
+          IIC_VMOVIS, "vdup", Dt, "$dst, $src",
           [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
 
-def  VDUP8d   : VDUPD<0b11101100, 0b00, ".8", v8i8>;
-def  VDUP16d  : VDUPD<0b11101000, 0b01, ".16", v4i16>;
-def  VDUP32d  : VDUPD<0b11101000, 0b00, ".32", v2i32>;
-def  VDUP8q   : VDUPQ<0b11101110, 0b00, ".8", v16i8>;
-def  VDUP16q  : VDUPQ<0b11101010, 0b01, ".16", v8i16>;
-def  VDUP32q  : VDUPQ<0b11101010, 0b00, ".32", v4i32>;
+def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
 
 def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
-                      IIC_VMOVIS, "vdup", ".32\t$dst, $src",
+                      IIC_VMOVIS, "vdup", "32", "$dst, $src",
                       [(set DPR:$dst, (v2f32 (NEONvdup
                                               (f32 (bitconvert GPR:$src)))))]>;
 def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
-                      IIC_VMOVIS, "vdup", ".32\t$dst, $src",
+                      IIC_VMOVIS, "vdup", "32", "$dst, $src",
                       [(set QPR:$dst, (v4f32 (NEONvdup
                                               (f32 (bitconvert GPR:$src)))))]>;
 
 //   VDUP     : Vector Duplicate Lane (from scalar to all elements)
 
-class VDUPLND<string OpcodeStr, ValueType Ty>
-  : N2VDup<0b11, 0b11, 0b11000, 0, 0,
+class VDUPLND<bits<2> op19_18, bits<2> op17_16,
+              string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0,
         (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
-        !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
+        OpcodeStr, Dt, "$dst, $src[$lane]", "",
         [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
 
-class VDUPLNQ<string OpcodeStr, ValueType ResTy, ValueType OpTy>
-  : N2VDup<0b11, 0b11, 0b11000, 1, 0,
+class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy>
+  : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0,
         (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
-        !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
+        OpcodeStr, Dt, "$dst, $src[$lane]", "",
         [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>;
 
 // Inst{19-16} is partially specified depending on the element size.
 
-def VDUPLN8d  : VDUPLND<"vdup.8", v8i8> { let Inst{16} = 1; }
-def VDUPLN16d : VDUPLND<"vdup.16", v4i16> { let Inst{17-16} = 0b10; }
-def VDUPLN32d : VDUPLND<"vdup.32", v2i32> { let Inst{18-16} = 0b100; }
-def VDUPLNfd  : VDUPLND<"vdup.32", v2f32> { let Inst{18-16} = 0b100; }
-def VDUPLN8q  : VDUPLNQ<"vdup.8", v16i8, v8i8> { let Inst{16} = 1; }
-def VDUPLN16q : VDUPLNQ<"vdup.16", v8i16, v4i16> { let Inst{17-16} = 0b10; }
-def VDUPLN32q : VDUPLNQ<"vdup.32", v4i32, v2i32> { let Inst{18-16} = 0b100; }
-def VDUPLNfq  : VDUPLNQ<"vdup.32", v4f32, v2f32> { let Inst{18-16} = 0b100; }
+def VDUPLN8d  : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>;
+def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>;
+def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>;
+def VDUPLNfd  : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>;
+def VDUPLN8q  : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>;
+def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>;
+def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>;
+def VDUPLNfq  : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>;
 
 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
           (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -2579,19 +2817,15 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
                                    (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
-def  VDUPfdf  : N2VDup<0b11, 0b11, 0b11000, 0, 0,
-                       (outs DPR:$dst), (ins SPR:$src),
-                       IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "",
-                       [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]> {
-  let Inst{18-16} = 0b100;
-}
+def  VDUPfdf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
+                    (outs DPR:$dst), (ins SPR:$src),
+                    IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
 
-def  VDUPfqf  : N2VDup<0b11, 0b11, 0b11000, 1, 0,
-                       (outs QPR:$dst), (ins SPR:$src),
-                       IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "",
-                       [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]> {
-  let Inst{18-16} = 0b100;
-}
+def  VDUPfqf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
+                    (outs QPR:$dst), (ins SPR:$src),
+                    IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
 
 def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
           (INSERT_SUBREG QPR:$src, 
@@ -2603,176 +2837,178 @@ def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
                          (DSubReg_f64_other_reg imm:$lane))>;
 
 //   VMOVN    : Vector Narrowing Move
-defm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i",
-                            int_arm_neon_vmovn>;
+defm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+                            "vmovn", "i", int_arm_neon_vmovn>;
 //   VQMOVN   : Vector Saturating Narrowing Move
-defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s",
-                            int_arm_neon_vqmovns>;
-defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u",
-                            int_arm_neon_vqmovnu>;
-defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s",
-                            int_arm_neon_vqmovnsu>;
+defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+                            "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+                            "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
 //   VMOVL    : Vector Lengthening Move
-defm VMOVLs   : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>;
-defm VMOVLu   : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>;
+defm VMOVLs   : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
+                            int_arm_neon_vmovls>;
+defm VMOVLu   : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
+                            int_arm_neon_vmovlu>;
 
 // Vector Conversions.
 
 //   VCVT     : Vector Convert Between Floating-Point and Integers
-def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
+def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
                      v2i32, v2f32, fp_to_sint>;
-def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
+def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
                      v2i32, v2f32, fp_to_uint>;
-def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
+def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
                      v2f32, v2i32, sint_to_fp>;
-def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
+def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
                      v2f32, v2i32, uint_to_fp>;
 
-def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
+def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
                      v4i32, v4f32, fp_to_sint>;
-def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
+def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
                      v4i32, v4f32, fp_to_uint>;
-def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
+def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
                      v4f32, v4i32, sint_to_fp>;
-def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
+def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
                      v4f32, v4i32, uint_to_fp>;
 
 //   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
-def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32",
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
                         v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
-def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32",
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
                         v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
-def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32",
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
-def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32",
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
 
-def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32",
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
                         v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
-def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32",
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
                         v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
-def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32",
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
-def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32",
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
 
 // Vector Reverse.
 
 //   VREV64   : Vector Reverse elements within 64-bit doublewords
 
-class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
         (ins DPR:$src), IIC_VMOVD, 
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
-class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
         (ins QPR:$src), IIC_VMOVD, 
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
 
-def VREV64d8  : VREV64D<0b00, "vrev64.8", v8i8>;
-def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>;
-def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>;
-def VREV64df  : VREV64D<0b10, "vrev64.32", v2f32>;
+def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def VREV64df  : VREV64D<0b10, "vrev64", "32", v2f32>;
 
-def VREV64q8  : VREV64Q<0b00, "vrev64.8", v16i8>;
-def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>;
-def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>;
-def VREV64qf  : VREV64Q<0b10, "vrev64.32", v4f32>;
+def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def VREV64qf  : VREV64Q<0b10, "vrev64", "32", v4f32>;
 
 //   VREV32   : Vector Reverse elements within 32-bit words
 
-class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
         (ins DPR:$src), IIC_VMOVD, 
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
-class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
         (ins QPR:$src), IIC_VMOVD, 
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
 
-def VREV32d8  : VREV32D<0b00, "vrev32.8", v8i8>;
-def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>;
+def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
 
-def VREV32q8  : VREV32Q<0b00, "vrev32.8", v16i8>;
-def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>;
+def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
 
 //   VREV16   : Vector Reverse elements within 16-bit halfwords
 
-class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
         (ins DPR:$src), IIC_VMOVD, 
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
-class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
         (ins QPR:$src), IIC_VMOVD, 
-        !strconcat(OpcodeStr, "\t$dst, $src"), "",
+        OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
 
-def VREV16d8  : VREV16D<0b00, "vrev16.8", v8i8>;
-def VREV16q8  : VREV16Q<0b00, "vrev16.8", v16i8>;
+def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
 
 // Other Vector Shuffles.
 
 //   VEXT     : Vector Extract
 
-class VEXTd<string OpcodeStr, ValueType Ty>
-  : N3VImm<0,1,0b11,0,0, (outs DPR:$dst),
-           (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD,
-           !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "",
-           [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
-                                         (Ty DPR:$rhs), imm:$index)))]>;
-
-class VEXTq<string OpcodeStr, ValueType Ty>
-  : N3VImm<0,1,0b11,1,0, (outs QPR:$dst),
-           (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ,
-           !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "",
-           [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
-                                         (Ty QPR:$rhs), imm:$index)))]>;
-
-def VEXTd8  : VEXTd<"vext.8",  v8i8>;
-def VEXTd16 : VEXTd<"vext.16", v4i16>;
-def VEXTd32 : VEXTd<"vext.32", v2i32>;
-def VEXTdf  : VEXTd<"vext.32", v2f32>;
-
-def VEXTq8  : VEXTq<"vext.8",  v16i8>;
-def VEXTq16 : VEXTq<"vext.16", v8i16>;
-def VEXTq32 : VEXTq<"vext.32", v4i32>;
-def VEXTqf  : VEXTq<"vext.32", v4f32>;
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
+        (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD,
+        OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+        [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
+                                      (Ty DPR:$rhs), imm:$index)))]>;
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
+        (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ,
+        OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+        [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
+                                      (Ty QPR:$rhs), imm:$index)))]>;
+
+def VEXTd8  : VEXTd<"vext", "8",  v8i8>;
+def VEXTd16 : VEXTd<"vext", "16", v4i16>;
+def VEXTd32 : VEXTd<"vext", "32", v2i32>;
+def VEXTdf  : VEXTd<"vext", "32", v2f32>;
+
+def VEXTq8  : VEXTq<"vext", "8",  v16i8>;
+def VEXTq16 : VEXTq<"vext", "16", v8i16>;
+def VEXTq32 : VEXTq<"vext", "32", v4i32>;
+def VEXTqf  : VEXTq<"vext", "32", v4f32>;
 
 //   VTRN     : Vector Transpose
 
-def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn.8">;
-def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn.16">;
-def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn.32">;
+def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
 
-def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">;
-def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">;
-def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">;
+def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
 
 //   VUZP     : Vector Unzip (Deinterleave)
 
-def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp.8">;
-def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp.16">;
-def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp.32">;
+def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
 
-def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">;
-def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">;
-def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">;
+def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
 
 //   VZIP     : Vector Zip (Interleave)
 
-def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip.8">;
-def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip.16">;
-def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip.32">;
+def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
 
-def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">;
-def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">;
-def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">;
+def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
 
 // Vector Table Lookup and Table Extension.
 
@@ -2780,25 +3016,25 @@ def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">;
 def  VTBL1
   : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
         (ins DPR:$tbl1, DPR:$src), IIC_VTB1,
-        "vtbl.8\t$dst, \\{$tbl1\\}, $src", "",
+        "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBL2
   : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
         (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2,
-        "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "",
+        "vtbl", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
                                DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
 def  VTBL3
   : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
         (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3,
-        "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "",
+        "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
                                DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
 def  VTBL4
   : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
         (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4,
-        "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "",
+        "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
                                DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
 } // hasExtraSrcRegAllocReq = 1
@@ -2807,26 +3043,26 @@ def  VTBL4
 def  VTBX1
   : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
         (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1,
-        "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst",
+        "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
                                DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBX2
   : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
         (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2,
-        "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst",
+        "vtbx", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
                                DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
 def  VTBX3
   : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
         (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3,
-        "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst",
+        "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
                                DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
 def  VTBX4
   : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
         DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4,
-        "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst",
+        "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst",
         [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
                                DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
 } // hasExtraSrcRegAllocReq = 1
@@ -2840,17 +3076,17 @@ def  VTBX4
 
 // Vector Add Operations used for single-precision FP
 let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>;
+def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>;
 def : N3VDsPat<fadd, VADDfd_sfp>;
 
 // Vector Sub Operations used for single-precision FP
 let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>;
+def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>;
 def : N3VDsPat<fsub, VSUBfd_sfp>;
 
 // Vector Multiply Operations used for single-precision FP
 let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>;
+def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>;
 def : N3VDsPat<fmul, VMULfd_sfp>;
 
 // Vector Multiply-Accumulate/Subtract used for single-precision FP
@@ -2858,17 +3094,17 @@ def : N3VDsPat<fmul, VMULfd_sfp>;
 // we want to avoid them for now. e.g., alternating vmla/vadd instructions.
 
 //let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
+//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>;
 //def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
 
 //let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
+//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>;
 //def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
 
 // Vector Absolute used for single-precision FP
 let neverHasSideEffects = 1 in
 def  VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
-                           IIC_VUNAD, "vabs.f32",
+                           IIC_VUNAD, "vabs", "f32",
                            v2f32, v2f32, int_arm_neon_vabs>;
 def : N2VDIntsPat<fabs, VABSfd_sfp>;
 
@@ -2876,27 +3112,27 @@ def : N2VDIntsPat<fabs, VABSfd_sfp>;
 let neverHasSideEffects = 1 in
 def  VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
                         (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
-                        "vneg.f32\t$dst, $src", "", []>;
+                        "vneg", "f32", "$dst, $src", "", []>;
 def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
 
 // Vector Convert between single-precision FP and integer
 let neverHasSideEffects = 1 in
-def  VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
+def  VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
                           v2i32, v2f32, fp_to_sint>;
 def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
 
 let neverHasSideEffects = 1 in
-def  VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
+def  VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
                           v2i32, v2f32, fp_to_uint>;
 def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
 
 let neverHasSideEffects = 1 in
-def  VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
+def  VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
                           v2f32, v2i32, sint_to_fp>;
 def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
 
 let neverHasSideEffects = 1 in
-def  VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
+def  VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
                           v2f32, v2i32, uint_to_fp>;
 def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index d1831d1..b5956a3 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -296,7 +296,7 @@ let isBranch = 1, isTerminator = 1 in {
 //  Load Store Instructions.
 //
 
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
 def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, 
                "ldr", "\t$dst, $addr",
                [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
@@ -332,13 +332,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
 
 // Load tconstpool
 // FIXME: Use ldr.n to work around a Darwin assembler bug.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1  in 
 def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
                   "ldr", ".n\t$dst, $addr",
                   [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+    mayHaveSideEffects = 1  in
 def tLDRcp  : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
                   "ldr", "\t$dst, $addr", []>;
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 1bb9bfd..9489815 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -49,8 +49,8 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
 // immediate splatted into multiple bytes of the word. t2_so_imm values are
 // represented in the imm field in the same 12-bit form that they are encoded
-// into t2_so_imm instructions: the 8-bit immediate is the least significant bits
-// [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
+// into t2_so_imm instructions: the 8-bit immediate is the least significant
+// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
 def t2_so_imm : Operand<i32>,
                 PatLeaf<(imm), [{
   return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; 
@@ -88,6 +88,21 @@ def t2_so_imm2part_2 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(V, MVT::i32);
 }]>;
 
+def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
+      return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue());
+    }]> {
+}
+
+def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
 /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
 def imm1_31 : PatLeaf<(i32 imm), [{
   return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
@@ -252,9 +267,9 @@ multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 
-/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
-/// binary operation that produces a value and use and define the carry bit.
-/// It's not predicable.
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use and define the carry
+/// bit. It's not predicable.
 let Uses = [CPSR] in {
 multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
@@ -471,7 +486,7 @@ def t2SUBrSPs_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
 //
 
 // Load
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1  in 
 defm t2LDR   : T2I_ld<"ldr",  UnOpFrag<(load node:$Src)>>;
 
 // Loads with zero extension
@@ -615,7 +630,7 @@ def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb),
                             AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
                           "str", "\t$src, [$base], $offset", "$base = $base_wb",
              [(set GPR:$base_wb,
-                   (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+                  (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
 def t2STRH_PRE  : T2Iidxldst<(outs GPR:$base_wb),
                             (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
@@ -718,9 +733,9 @@ def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
             (t2UXTB16r_rot GPR:$Src, 8)>;
 
 defm t2UXTAB : T2I_bin_rrot<"uxtab",
-                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+                           BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
 defm t2UXTAH : T2I_bin_rrot<"uxtah",
-                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+                           BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1162,15 +1177,9 @@ def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS),
 def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS),
              (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
                     (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(sub GPR:$LHS, t2_so_imm2part:$RHS),
-             (t2SUBri (t2SUBri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
-                    (t2_so_imm2part_2 imm:$RHS))>;
-
-// ConstantPool, GlobalAddress, and JumpTable
-def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>;
-def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
-def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS),
+             (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
+                    (t2_so_neg_imm2part_2 imm:$RHS))>;
 
 // 32-bit immediate using movw + movt.
 // This is a single pseudo instruction to make it re-materializable. Remove
@@ -1180,10 +1189,20 @@ def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>;
 
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+           Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+           Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
 // Pseudo instruction that combines ldr from constpool and add pc. This should
 // be expanded into two instructions late to allow if-conversion and
 // scheduling.
-let isReMaterializable = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in 
 def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
                    NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
                [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index ba341f4..5bfe89d 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>,
 //  Load / store Instructions.
 //
 
-let canFoldAsLoad = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
                  [(set DPR:$dst, (load addrmode5:$addr))]>;
@@ -437,7 +437,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
 let isReMaterializable = 1 in {
 def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
                     VFPMiscFrm, IIC_VMOVImm,
-                    "fconstd", "\t$dst, $imm",
+                    "vmov", ".f64\t$dst, $imm",
                     [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
   let Inst{21-20} = 0b11;
@@ -448,7 +448,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
 
 def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
                     VFPMiscFrm, IIC_VMOVImm,
-                    "fconsts", "\t$dst, $imm",
+                    "vmov", ".f32\t$dst, $imm",
                     [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
   let Inst{21-20} = 0b11;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 24990e6..aa50cfd 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -139,7 +139,8 @@ ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
 
 void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
                                              JITCodeEmitter &JCE) {
-  JCE.startGVStub(GV, 4, 4);
+  MachineCodeEmitter::BufferState BS;
+  JCE.startGVStub(BS, GV, 4, 4);
   intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
   if (!sys::Memory::setRangeWritable((void*)Addr, 4)) {
     llvm_unreachable("ERROR: Unable to mark indirect symbol writable");
@@ -148,19 +149,27 @@ void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
   if (!sys::Memory::setRangeExecutable((void*)Addr, 4)) {
     llvm_unreachable("ERROR: Unable to mark indirect symbol executable");
   }
-  void *PtrAddr = JCE.finishGVStub(GV);
+  void *PtrAddr = JCE.finishGVStub(BS);
   addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
   return PtrAddr;
 }
 
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+  // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+  // 4-byte address.  See emitFunctionStub for details.
+  StubLayout Result = {16, 4};
+  return Result;
+}
+
 void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
                                    JITCodeEmitter &JCE) {
+  void *Addr;
   // If this is just a call to an external function, emit a branch instead of a
   // call.  The code is the same except for one bit of the last instruction.
   if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
     // Branch to the corresponding function addr.
     if (IsPIC) {
-      // The stub is 8-byte size and 4-aligned.
+      // The stub is 16-byte size and 4-aligned.
       intptr_t LazyPtr = getIndirectSymAddr(Fn);
       if (!LazyPtr) {
         // In PIC mode, the function stub is loading a lazy-ptr.
@@ -172,30 +181,30 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
                 errs() << "JIT: Stub emitted at [" << LazyPtr
                        << "] for external function at '" << Fn << "'\n");
       }
-      JCE.startGVStub(F, 16, 4);
-      intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
-      if (!sys::Memory::setRangeWritable((void*)Addr, 16)) {
+      JCE.emitAlignment(4);
+      Addr = (void*)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable(Addr, 16)) {
         llvm_unreachable("ERROR: Unable to mark stub writable");
       }
-      JCE.emitWordLE(0xe59fc004);            // ldr pc, [pc, #+4]
+      JCE.emitWordLE(0xe59fc004);            // ldr ip, [pc, #+4]
       JCE.emitWordLE(0xe08fc00c);            // L_func$scv: add ip, pc, ip
       JCE.emitWordLE(0xe59cf000);            // ldr pc, [ip]
-      JCE.emitWordLE(LazyPtr - (Addr+4+8));  // func - (L_func$scv+8)
-      sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
-      if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) {
+      JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8));  // func - (L_func$scv+8)
+      sys::Memory::InvalidateInstructionCache(Addr, 16);
+      if (!sys::Memory::setRangeExecutable(Addr, 16)) {
         llvm_unreachable("ERROR: Unable to mark stub executable");
       }
     } else {
       // The stub is 8-byte size and 4-aligned.
-      JCE.startGVStub(F, 8, 4);
-      intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
-      if (!sys::Memory::setRangeWritable((void*)Addr, 8)) {
+      JCE.emitAlignment(4);
+      Addr = (void*)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable(Addr, 8)) {
         llvm_unreachable("ERROR: Unable to mark stub writable");
       }
       JCE.emitWordLE(0xe51ff004);    // ldr pc, [pc, #-4]
       JCE.emitWordLE((intptr_t)Fn);  // addr of function
-      sys::Memory::InvalidateInstructionCache((void*)Addr, 8);
-      if (!sys::Memory::setRangeExecutable((void*)Addr, 8)) {
+      sys::Memory::InvalidateInstructionCache(Addr, 8);
+      if (!sys::Memory::setRangeExecutable(Addr, 8)) {
         llvm_unreachable("ERROR: Unable to mark stub executable");
       }
     }
@@ -207,9 +216,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
     //
     // Branch and link to the compilation callback.
     // The stub is 16-byte size and 4-byte aligned.
-    JCE.startGVStub(F, 16, 4);
-    intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
-    if (!sys::Memory::setRangeWritable((void*)Addr, 16)) {
+    JCE.emitAlignment(4);
+    Addr = (void*)JCE.getCurrentPCValue();
+    if (!sys::Memory::setRangeWritable(Addr, 16)) {
       llvm_unreachable("ERROR: Unable to mark stub writable");
     }
     // Save LR so the callback can determine which stub called it.
@@ -222,13 +231,13 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
     JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
     // The address of the compilation callback.
     JCE.emitWordLE((intptr_t)ARMCompilationCallback);
-    sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
-    if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) {
+    sys::Memory::InvalidateInstructionCache(Addr, 16);
+    if (!sys::Memory::setRangeExecutable(Addr, 16)) {
       llvm_unreachable("ERROR: Unable to mark stub executable");
     }
   }
 
-  return JCE.finishGVStub(F);
+  return Addr;
 }
 
 intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 7dfeed8..ff332b7 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -61,6 +61,10 @@ namespace llvm {
     virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
                                             JITCodeEmitter &JCE);
 
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on ARM.
+    virtual StubLayout getStubLayout();
+
     /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
     /// small native function that simply calls the function at the specified
     /// address.
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
index 427645c..bbbf413 100644
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -180,7 +180,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   // Double-precision FP Unary
   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>]>,
+                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
   //
   // Single-precision FP Compare
   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -189,17 +189,17 @@ def CortexA8Itineraries : ProcessorItineraries<[
   // Double-precision FP Compare
   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>]>,
+                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
   //
   // Single to Double FP Convert
   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<7, [FU_NPipe], 0>,
-                               InstrStage<7, [FU_NLSPipe]>]>,
+                               InstrStage<7, [FU_NLSPipe]>], [7, 1]>,
   //
   // Double to Single FP Convert
   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<5, [FU_NPipe], 0>,
-                               InstrStage<5, [FU_NLSPipe]>]>,
+                               InstrStage<5, [FU_NLSPipe]>], [5, 1]>,
   //
   // Single-Precision FP to Integer Convert
   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -208,7 +208,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   // Double-Precision FP to Integer Convert
   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>]>,
+                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
   //
   // Integer to Single-Precision FP Convert
   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -217,7 +217,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   // Integer to Double-Precision FP Convert
   InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>]>,
+                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
   //
   // Single-precision FP ALU
   InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -226,7 +226,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   // Double-precision FP ALU
   InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<9, [FU_NPipe], 0>,
-                               InstrStage<9, [FU_NLSPipe]>]>,
+                               InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>,
   //
   // Single-precision FP Multiply
   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -235,7 +235,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
   // Double-precision FP Multiply
   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<11, [FU_NPipe], 0>,
-                               InstrStage<11, [FU_NLSPipe]>]>,
+                               InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>,
   //
   // Single-precision FP MAC
   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -244,27 +244,27 @@ def CortexA8Itineraries : ProcessorItineraries<[
   // Double-precision FP MAC
   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>]>,
+                               InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>,
   //
   // Single-precision FP DIV
   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<20, [FU_NPipe], 0>,
-                               InstrStage<20, [FU_NLSPipe]>]>,
+                               InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>,
   //
   // Double-precision FP DIV
   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>]>,
+                               InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>,
   //
   // Single-precision FP SQRT
   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>]>,
+                               InstrStage<19, [FU_NLSPipe]>], [19, 1]>,
   //
   // Double-precision FP SQRT
   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>]>,
+                               InstrStage<29, [FU_NLSPipe]>], [29, 1]>,
   //
   // Single-precision FP Load
   // use FU_Issue to enforce the 1 load/store per cycle limit
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 432ed78..71f3883 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -27,6 +27,10 @@ UseNEONFP("arm-use-neon-fp",
           cl::desc("Use NEON for single-precision FP"),
           cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+UseMOVT("arm-use-movt",
+        cl::init(true), cl::Hidden);
+
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
                            bool isT)
   : ARMArchVersion(V4T)
@@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , ThumbMode(Thumb1)
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
+  , UseMovt(UseMOVT)
   , stackAlignment(4)
   , CPUString("generic")
   , TargetType(isELF) // Default to ELF unless otherwise specified.
@@ -109,8 +114,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
     if (UseNEONFP.getPosition() == 0)
       UseNEONForSinglePrecisionFP = true;
   }
-  HasBranchTargetBuffer = (CPUString == "cortex-a8" ||
-                           CPUString == "cortex-a9");
 }
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 3d0e01e..3f06b7b 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -50,9 +50,6 @@ protected:
   /// determine if NEON should actually be used.
   bool UseNEONForSinglePrecisionFP;
 
-  /// HasBranchTargetBuffer - True if processor can predict indirect branches.
-  bool HasBranchTargetBuffer;
-
   /// IsThumb - True if we are in thumb mode, false if in ARM mode.
   bool IsThumb;
 
@@ -65,6 +62,10 @@ protected:
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
+  /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+  /// imms (including global addresses).
+  bool UseMovt;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -126,12 +127,12 @@ protected:
   bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
   bool hasThumb2() const { return ThumbMode >= Thumb2; }
 
-  bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; }
-
   bool isR9Reserved() const { return IsR9Reserved; }
 
+  bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+
   const std::string & getCPUString() const { return CPUString; }
-  
+
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                              TargetSubtarget::AntiDepBreakMode& Mode,
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index dd4a240..692bb19 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -330,6 +330,8 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  const char *Modifier) {
   const MachineOperand &MO = MI->getOperand(OpNum);
+  unsigned TF = MO.getTargetFlags();
+
   switch (MO.getType()) {
   default:
     assert(0 && "<unknown operand type>");
@@ -356,12 +358,12 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_Immediate: {
     int64_t Imm = MO.getImm();
     O << '#';
-    if (Modifier) {
-      if (strcmp(Modifier, "lo16") == 0)
-        O << ":lower16:";
-      else if (strcmp(Modifier, "hi16") == 0)
-        O << ":upper16:";
-    }
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF & ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF & ARMII::MO_HI16))
+      O << ":upper16:";
     O << Imm;
     break;
   }
@@ -371,6 +373,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
     GlobalValue *GV = MO.getGlobal();
+
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF & ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF & ARMII::MO_HI16))
+      O << ":upper16:";
     O << Mang->getMangledName(GV);
 
     printOffset(MO.getOffset());
@@ -998,7 +1007,7 @@ void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) {
 
 void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) {
   const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
-  O << '#' << ARM::getVFPf32Imm(FP->getValueAPF());
+  O << '#' << FP->getValueAPF().convertToFloat();
   if (VerboseAsm) {
     O.PadToColumn(MAI->getCommentColumn());
     O << MAI->getCommentString() << ' ';
@@ -1008,7 +1017,7 @@ void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) {
 
 void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum) {
   const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
-  O << '#' << ARM::getVFPf64Imm(FP->getValueAPF());
+  O << '#' << FP->getValueAPF().convertToDouble();
   if (VerboseAsm) {
     O.PadToColumn(MAI->getCommentColumn());
     O << MAI->getCommentString() << ' ';
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 7d767ec..50abcf4 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -81,8 +81,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
         //    afterwards
         //  - The imp-defs / imp-uses are superregs only, we don't care about
         //    them.
-        BuildMI(MBB, *MI, MI->getDebugLoc(),
-                TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
+        AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
+                             TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg));
         MBB.erase(MI);
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ad1739c..b2fd7b3 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -78,7 +78,7 @@ namespace {
     { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
     { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
     { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
-    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
+    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 1 },
     // FIXME: Do we need the 16-bit 'S' variant?
     { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
     { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
@@ -413,6 +413,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     if (MI->getOperand(2).getImm() == 0)
       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
     break;
+  case ARM::t2MOVi16:
+    // Can convert only 'pure' immediate operands, not immediates obtained as
+    // globals' addresses.
+    if (MI->getOperand(1).isImm())
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+    break;
   }
   return false;
 }
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 9217522..b5579f4 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -127,10 +127,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
 
   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote);
 
-  // We don't have line number support yet.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-  setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
 
   // Not implemented yet.
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
index d328135..b3b711e 100644
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -190,17 +190,27 @@ extern "C" {
 #endif
 }
 
+TargetJITInfo::StubLayout AlphaJITInfo::getStubLayout() {
+  // The stub contains 19 4-byte instructions, aligned at 4 bytes:
+  // R0 = R27
+  // 8 x "R27 <<= 8; R27 |= 8-bits-of-Target"  == 16 instructions
+  // JMP R27
+  // Magic number so the compilation callback can recognize the stub.
+  StubLayout Result = {19 * 4, 4};
+  return Result;
+}
+
 void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn,
                                      JITCodeEmitter &JCE) {
+  MachineCodeEmitter::BufferState BS;
   //assert(Fn == AlphaCompilationCallback && "Where are you going?\n");
   //Do things in a stupid slow way!
-  JCE.startGVStub(F, 19*4);
   void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue();
   for (int x = 0; x < 19; ++ x)
     JCE.emitWordLE(0);
   EmitBranchToAt(Addr, Fn);
   DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n");
-  return JCE.finishGVStub(F);
+  return Addr;
 }
 
 TargetJITInfo::LazyResolverFn
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
index ecb467f..bd358a4 100644
--- a/lib/Target/Alpha/AlphaJITInfo.h
+++ b/lib/Target/Alpha/AlphaJITInfo.h
@@ -31,6 +31,7 @@ namespace llvm {
     explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
     { useGOT = true; }
 
+    virtual StubLayout getStubLayout();
     virtual void *emitFunctionStub(const Function* F, void *Fn,
                                    JITCodeEmitter &JCE);
     virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index c5c96f8..ad2510a 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -114,10 +114,6 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
   // READCYCLECOUNTER needs special type legalization.
   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
 
-  // We don't have line number support yet.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-  setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
 
   // Use the default implementation.
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index 642d10f..d396cc8 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -44,7 +44,7 @@ class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
   let Num = num;
 }
 
-// Ywo halves of 32-bit register
+// Two halves of 32-bit register
 multiclass Rss<bits<3> group, bits<3> num, string n> {
   def H : Rs<group, num, 1, !strconcat(n, ".h")>;
   def L : Rs<group, num, 0, !strconcat(n, ".l")>;
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 4dd82a6..23e192e 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -387,10 +387,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   // We cannot sextinreg(i1).  Expand to shifts.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  // Support label based line numbers.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
   // We want to legalize GlobalAddress and ConstantPool nodes into the
   // appropriate instructions to materialize the address.
   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index d3b575a..f24ffd2 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -31,14 +31,6 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
 }
 
 //===----------------------------------------------------------------------===//
-// DWARF debugging Pseudo Instructions
-//===----------------------------------------------------------------------===//
-
-def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
-           ".loc $file, $line, $col",
-           [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
-
-//===----------------------------------------------------------------------===//
 // Loads:
 // NB: The ordering is actually important, since the instruction selection
 // will try each of the instructions in sequence, i.e., the D-form first with
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index c0084be..beccb2c 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -312,8 +312,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N,
   else if (AM.JT != -1)
     Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
   else if (AM.BlockAddr)
-    Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/,
-                                   true /*AM.SymbolFlags*/);
+    Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+                                   true, 0/*AM.SymbolFlags*/);
   else
     Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
 
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 5a925f5..29cc370 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -162,7 +162,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const {
-  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 2;
 }
 
 //===----------------------------------------------------------------------===//
@@ -594,9 +594,17 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
   default: llvm_unreachable("Invalid integer condition!");
   case ISD::SETEQ:
     TCC = MSP430CC::COND_E;     // aka COND_Z
+    // Minor optimization: if RHS is a constant, swap operands, then the
+    // constant can be folded into comparison.
+    if (RHS.getOpcode() == ISD::Constant)
+      std::swap(LHS, RHS);
     break;
   case ISD::SETNE:
     TCC = MSP430CC::COND_NE;    // aka COND_NZ
+    // Minor optimization: if RHS is a constant, swap operands, then the
+    // constant can be folded into comparison.
+    if (RHS.getOpcode() == ISD::Constant)
+      std::swap(LHS, RHS);
     break;
   case ISD::SETULE:
     std::swap(LHS, RHS);        // FALLTHROUGH
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index c3bbfe8..7a26f6c 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -823,37 +823,6 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
                 "cmp.w\t{$src1, $src2}",
                 [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
 
-def CMP8mi0 : Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.b\t{$src1, #0}",
-                [(MSP430cmp (load addr:$src1), (i8 0)), (implicit SRW)]>;
-def CMP16mi0: Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.w\t{$src1, #0}",
-                [(MSP430cmp (load addr:$src1), (i16 0)), (implicit SRW)]>;
-def CMP8mi1 : Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.b\t{$src1, #1}",
-                [(MSP430cmp (load addr:$src1), (i8 1)), (implicit SRW)]>;
-def CMP16mi1: Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.w\t{$src1, #1}",
-                [(MSP430cmp (load addr:$src1), (i16 1)), (implicit SRW)]>;
-def CMP8mi2 : Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.b\t{$src1, #2}",
-                [(MSP430cmp (load addr:$src1), (i8 2)), (implicit SRW)]>;
-def CMP16mi2: Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.w\t{$src1, #2}",
-                [(MSP430cmp (load addr:$src1), (i16 2)), (implicit SRW)]>;
-def CMP8mi4 : Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.b\t{$src1, #4}",
-                [(MSP430cmp (load addr:$src1), (i8 4)), (implicit SRW)]>;
-def CMP16mi4: Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.w\t{$src1, #4}",
-                [(MSP430cmp (load addr:$src1), (i16 4)), (implicit SRW)]>;
-def CMP8mi8 : Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.b\t{$src1, #8}",
-                [(MSP430cmp (load addr:$src1), (i8 8)), (implicit SRW)]>;
-def CMP16mi8: Pseudo<(outs), (ins memsrc:$src1),
-                "cmp.w\t{$src1, #8}",
-                [(MSP430cmp (load addr:$src1), (i16 8)), (implicit SRW)]>;
-
 } // Defs = [SRW]
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
index 4e3a8d0..516eacb 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -19,6 +19,7 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
   WeakRefDirective ="\t.weak\t";
   SetDirective = "\t.set\t";
   PCSymbol=".";
+  CommentString = ";";
 
   AlignmentIsInBytes = false;
   AllowNameToStartWithDigit = true;
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 2990ba9..ede111d 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -144,6 +144,7 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
   // on PIC code Load GA
   if (TM.getRelocationModel() == Reloc::PIC_) {
     if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || 
+        (Addr.getOpcode() == ISD::TargetConstantPool) || 
         (Addr.getOpcode() == ISD::TargetJumpTable)){
       Base   = CurDAG->getRegister(Mips::GP, MVT::i32);
       Offset = Addr;
@@ -174,23 +175,21 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
     }
 
     // When loading from constant pools, load the lower address part in
-    // the instruction itself. Instead of:
+    // the instruction itself. Example, instead of:
     //  lui $2, %hi($CPI1_0)
     //  addiu $2, $2, %lo($CPI1_0)
     //  lwc1 $f0, 0($2)
     // Generate:
     //  lui $2, %hi($CPI1_0)
     //  lwc1 $f0, %lo($CPI1_0)($2)
-    if (Addr.getOperand(0).getOpcode() == MipsISD::Hi &&
+    if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || 
+         Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
         Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
       SDValue LoVal = Addr.getOperand(1); 
-      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(
-          LoVal.getOperand(0))) {
-        if (!CP->getOffset()) {
-          Base = Addr.getOperand(0);
-          Offset = LoVal.getOperand(0);
-          return true;
-        }
+      if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) {
+        Base = Addr.getOperand(0);
+        Offset = LoVal.getOperand(0);
+        return true;
       }
     }
   }
@@ -235,6 +234,10 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) {
   else
     return NULL;
 
+  // Choose the offsets depending on the endianess
+  if (TM.getTargetData()->isBigEndian())
+    std::swap(Offset0, Offset1);
+
   // Instead of:
   //    ldc $f0, X($3)
   // Generate:
@@ -296,6 +299,10 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) {
   else
     return NULL;
 
+  // Choose the offsets depending on the endianess
+  if (TM.getTargetData()->isBigEndian())
+    std::swap(Offset0, Offset1);
+
   // Instead of:
   //    sdc $f0, X($3)
   // Generate:
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c9a43b4..ced8b93 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -132,10 +132,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::FLOG10,            MVT::f32,   Expand);
   setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
 
-  // We don't have line number support yet.
-  setOperationAction(ISD::DBG_STOPPOINT,     MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC,         MVT::Other, Expand);
-  setOperationAction(ISD::DBG_LABEL,         MVT::Other, Expand);
   setOperationAction(ISD::EH_LABEL,          MVT::Other, Expand);
 
   // Use the default for now
@@ -567,8 +563,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   SDValue ResNode;
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   Constant *C = N->getConstVal();
-  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
-                                         N->getOffset(), MipsII::MO_ABS_HILO);
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
 
@@ -581,11 +575,21 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   //  SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
   //  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
   //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); 
-  //} else { // %hi/%lo relocation
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
+                                      N->getOffset(), MipsII::MO_ABS_HILO);
     SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-  //}
+  } else {
+    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
+                                      N->getOffset(), MipsII::MO_GOT);
+    SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), 
+                                 CP, NULL, 0);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+    ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+  }
 
   return ResNode;
 }
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index af64c9f..6d8e160 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -200,22 +200,33 @@ void MipsInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI, 
                     const TargetRegisterClass *RC) const {
-  unsigned Opc;
-
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == Mips::CPURegsRegisterClass) 
-    Opc = Mips::SW;
+    BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
+          .addImm(0).addFrameIndex(FI);
   else if (RC == Mips::FGR32RegisterClass)
-    Opc = Mips::SWC1;
-  else {
-    assert(RC == Mips::AFGR64RegisterClass);
-    Opc = Mips::SDC1;
-  }
-  
-  BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+    BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
           .addImm(0).addFrameIndex(FI);
+  else if (RC == Mips::AFGR64RegisterClass) {
+    if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+      BuildMI(MBB, I, DL, get(Mips::SDC1))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addImm(0).addFrameIndex(FI);
+    } else {
+      const TargetRegisterInfo *TRI = 
+        MBB.getParent()->getTarget().getRegisterInfo();
+      const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
+      BuildMI(MBB, I, DL, get(Mips::SWC1))
+        .addReg(SubSet[0], getKillRegState(isKill))
+        .addImm(0).addFrameIndex(FI);
+      BuildMI(MBB, I, DL, get(Mips::SWC1))
+        .addReg(SubSet[1], getKillRegState(isKill))
+        .addImm(4).addFrameIndex(FI);
+    }
+  } else
+    llvm_unreachable("Register class not handled!");
 }
 
 void MipsInstrInfo::
@@ -223,19 +234,27 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const 
 {
-  unsigned Opc;
-  if (RC == Mips::CPURegsRegisterClass) 
-    Opc = Mips::LW;
-  else if (RC == Mips::FGR32RegisterClass)
-    Opc = Mips::LWC1;
-  else {
-    assert(RC == Mips::AFGR64RegisterClass);
-    Opc = Mips::LDC1;
-  }
-  
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI);
+
+  if (RC == Mips::CPURegsRegisterClass) 
+    BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
+  else if (RC == Mips::FGR32RegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI);
+  else if (RC == Mips::AFGR64RegisterClass) {
+    if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+      BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI);
+    } else {
+      const TargetRegisterInfo *TRI = 
+        MBB.getParent()->getTarget().getRegisterInfo();
+      const unsigned *SubSet = TRI->getSubRegisters(DestReg);
+      BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
+        .addImm(0).addFrameIndex(FI);
+      BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
+        .addImm(4).addFrameIndex(FI);
+    }
+  } else
+    llvm_unreachable("Register class not handled!");
 }
 
 MachineInstr *MipsInstrInfo::
@@ -278,11 +297,14 @@ foldMemoryOperandImpl(MachineFunction &MF,
       const TargetRegisterClass 
         *RC = RI.getRegClass(MI->getOperand(0).getReg());
       unsigned StoreOpc, LoadOpc;
+      bool IsMips1 = TM.getSubtarget<MipsSubtarget>().isMips1();
 
       if (RC == Mips::FGR32RegisterClass) {
         LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1;
       } else {
         assert(RC == Mips::AFGR64RegisterClass);
+        // Mips1 doesn't have ldc/sdc instructions.
+        if (IsMips1) break;
         LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1;
       }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index ad326db..cae4181 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -107,8 +107,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const
   static const unsigned BitMode32CalleeSavedRegs[] = {
     Mips::S0, Mips::S1, Mips::S2, Mips::S3, 
     Mips::S4, Mips::S5, Mips::S6, Mips::S7,
-    Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 
-    Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15,0
+    Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0
   };
 
   if (Subtarget.isSingleFloat())
@@ -136,9 +135,7 @@ MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
     &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
     &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
     &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 
-    &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
-    &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 
-    &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 0
+    &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
   };
 
   if (Subtarget.isSingleFloat())
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 0ed44d2..6e0e3ce 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -306,10 +306,9 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
     int ElementAux[PIC16Dbg::AuxSize] = { 0 };
     std::string TagName = "";
     DIDerivedType DITy(Element.getNode());
-    const char *ElementName = DITy.getName();
     unsigned short ElementSize = DITy.getSizeInBits()/8;
     // Get mangleddd name for this structure/union  element.
-    std::string MangMemName = ElementName + SuffixNo;
+    std::string MangMemName = DITy.getName().str() + SuffixNo;
     PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
     short Class = 0;
     if( CTy.getTag() == dwarf::DW_TAG_union_type)
@@ -337,12 +336,11 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
       continue;
     if (CTy.getTag() == dwarf::DW_TAG_union_type ||
         CTy.getTag() == dwarf::DW_TAG_structure_type ) {
-      const char *Name = CTy.getName();
       // Get the number after llvm.dbg.composite and make UniqueSuffix from 
       // it.
       std::string DIVar = CTy.getNode()->getNameStr();
       std::string UniqueSuffix = "." + DIVar.substr(18);
-      std::string MangledCTyName = Name + UniqueSuffix;
+      std::string MangledCTyName = CTy.getName().str() + UniqueSuffix;
       unsigned short size = CTy.getSizeInBits()/8;
       int Aux[PIC16Dbg::AuxSize] = {0};
       // 7th and 8th byte represent size of structure/union.
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
index 65f113e..73d30bf 100644
--- a/lib/Target/PowerPC/PPCFrameInfo.h
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -42,11 +42,12 @@ public:
   /// frame pointer.
   static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
     // For the Darwin ABI:
-    // Use the TOC save slot in the PowerPC linkage area for saving the frame
-    // pointer (if needed.)  LLVM does not generate code that uses the TOC (R2
-    // is treated as a caller saved register.)
+    // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+    // for saving the frame pointer (if needed.)  While the published ABI has
+    // not used this slot since at least MacOSX 10.2, there is older code
+    // around that does use it, and that needs to continue to work.
     if (isDarwinABI)
-      return isPPC64 ? 40 : 20;
+      return isPPC64 ? -8U : -4U;
     
     // SVR4 ABI: First slot in the general register save area.
     return -4U;
@@ -90,6 +91,17 @@ public:
   // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
   const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    if (TM.getSubtarget<PPCSubtarget>().isDarwinABI()) {
+      NumEntries = 1;
+      if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+        static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+        return &darwin64Offsets;
+      } else {
+        static const SpillSlot darwinOffsets = {PPC::R31, -4};
+        return &darwinOffsets;
+      }
+    }
+
     // Early exit if not using the SVR4 ABI.
     if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
       NumEntries = 0;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index fb9a240..e7334b5 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -86,7 +86,7 @@ namespace {
 
     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
     /// rotate and mask opcode and mask operation.
-    static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
+    static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
                                 unsigned &SH, unsigned &MB, unsigned &ME);
     
     /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
@@ -358,7 +358,7 @@ bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
 }
 
 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 
-                                      bool IsShiftMask, unsigned &SH, 
+                                      bool isShiftMask, unsigned &SH, 
                                       unsigned &MB, unsigned &ME) {
   // Don't even go down this path for i64, since different logic will be
   // necessary for rldicl/rldicr/rldimi.
@@ -374,12 +374,12 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
   
   if (Opcode == ISD::SHL) {
     // apply shift left to mask if it comes first
-    if (IsShiftMask) Mask = Mask << Shift;
+    if (isShiftMask) Mask = Mask << Shift;
     // determine which bits are made indeterminant by shift
     Indeterminant = ~(0xFFFFFFFFu << Shift);
   } else if (Opcode == ISD::SRL) { 
     // apply shift right to mask if it comes first
-    if (IsShiftMask) Mask = Mask >> Shift;
+    if (isShiftMask) Mask = Mask >> Shift;
     // determine which bits are made indeterminant by shift
     Indeterminant = ~(0xFFFFFFFFu >> Shift);
     // adjust for the left rotate
@@ -443,8 +443,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
     
     unsigned MB, ME;
     if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
-      SDValue Tmp1, Tmp2, Tmp3;
-      bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF;
+      SDValue Tmp1, Tmp2;
 
       if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
           isInt32Immediate(Op1.getOperand(1), Value)) {
@@ -461,10 +460,9 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
           Op1 = Op1.getOperand(0);
         }
       }
-      
-      Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0;
+
       SH &= 31;
-      SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB),
+      SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
                           getI32Imm(ME) };
       return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
     }
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 099fcb5..30a7861 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -182,10 +182,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // We cannot sextinreg(i1).  Expand to shifts.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  // Support label based line numbers.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
   setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
   setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
@@ -1174,7 +1170,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
   DebugLoc DL = Op.getDebugLoc();
 
   BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  SDValue TgtBA = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+  SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true);
   SDValue Zero = DAG.getConstant(0, PtrVT);
   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero);
   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero);
@@ -2177,10 +2173,10 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
 
 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
 /// adjusted to accomodate the arguments for the tailcall.
-static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
                                    unsigned ParamSize) {
 
-  if (!IsTailCall) return 0;
+  if (!isTailCall) return 0;
 
   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
   unsigned CallerMinReservedArea = FI->getMinReservedArea();
@@ -3190,8 +3186,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Construct the stack pointer operand.
-  bool IsPPC64 = Subtarget.isPPC64();
-  unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
+  bool isPPC64 = Subtarget.isPPC64();
+  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
 
   // Get the operands for the STACKRESTORE.
@@ -3213,7 +3209,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
 SDValue
 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  bool IsPPC64 = PPCSubTarget.isPPC64();
+  bool isPPC64 = PPCSubTarget.isPPC64();
   bool isDarwinABI = PPCSubTarget.isDarwinABI();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
@@ -3225,9 +3221,9 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!RASI) {
     // Find out what the fix offset of the frame pointer save area.
-    int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+    int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
     // Allocate the frame index for frame pointer save area.
-    RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset,
+    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset,
                                                 true, false);
     // Save the result.
     FI->setReturnAddrSaveIndex(RASI);
@@ -3238,7 +3234,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
 SDValue
 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  bool IsPPC64 = PPCSubTarget.isPPC64();
+  bool isPPC64 = PPCSubTarget.isPPC64();
   bool isDarwinABI = PPCSubTarget.isDarwinABI();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
@@ -3250,11 +3246,11 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI) {
     // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
                                                            isDarwinABI);
 
     // Allocate the frame index for frame pointer save area.
-    FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
                                                 true, false);
     // Save the result.
     FI->setFramePointerSaveIndex(FPSI);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f5c095a..2b3f80d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1358,15 +1358,6 @@ def RLWNM  : MForm_2<23,
 
 
 //===----------------------------------------------------------------------===//
-// DWARF Pseudo Instructions
-//
-
-def DWARF_LOC        : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
-                              "${:comment} .loc $file, $line, $col",
-                      [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
-                                  (i32 imm:$file))]>;
-
-//===----------------------------------------------------------------------===//
 // PowerPC Instruction Patterns
 //
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index ef25d92..c679bcd 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -323,6 +323,15 @@ PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
   return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
 }
 
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+  // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+  // instructions to save the caller's address if this is a lazy-compilation
+  // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+  // into a register and jump through it.
+  StubLayout Result = {10*4, 4};
+  return Result;
+}
+
 #if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
 defined(__APPLE__)
 extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
@@ -330,12 +339,12 @@ extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
 
 void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
                                    JITCodeEmitter &JCE) {
+  MachineCodeEmitter::BufferState BS;
   // If this is just a call to an external function, emit a branch instead of a
   // call.  The code is the same except for one bit of the last instruction.
   if (Fn != (void*)(intptr_t)PPC32CompilationCallback && 
       Fn != (void*)(intptr_t)PPC64CompilationCallback) {
-    JCE.startGVStub(F, 7*4);
-    intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+    void *Addr = (void*)JCE.getCurrentPCValue();
     JCE.emitWordBE(0);
     JCE.emitWordBE(0);
     JCE.emitWordBE(0);
@@ -343,13 +352,12 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
     JCE.emitWordBE(0);
     JCE.emitWordBE(0);
     JCE.emitWordBE(0);
-    EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit);
-    sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4);
-    return JCE.finishGVStub(F);
+    EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+    sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+    return Addr;
   }
 
-  JCE.startGVStub(F, 10*4);
-  intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+  void *Addr = (void*)JCE.getCurrentPCValue();
   if (is64Bit) {
     JCE.emitWordBE(0xf821ffb1);     // stdu r1,-80(r1)
     JCE.emitWordBE(0x7d6802a6);     // mflr r11
@@ -372,8 +380,8 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
   JCE.emitWordBE(0);
   JCE.emitWordBE(0);
   EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
-  sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4);
-  return JCE.finishGVStub(F);
+  sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+  return Addr;
 }
 
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 2e25b29..47ead59 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -30,6 +30,7 @@ namespace llvm {
       is64Bit = tmIs64Bit;
     }
 
+    virtual StubLayout getStubLayout();
     virtual void *emitFunctionStub(const Function* F, void *Fn,
                                    JITCodeEmitter &JCE);
     virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e65e644..0c3c8eb 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1032,18 +1032,17 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   //  Save R31 if necessary
   int FPSI = FI->getFramePointerSaveIndex();
-  bool IsPPC64 = Subtarget.isPPC64();
-  bool IsSVR4ABI = Subtarget.isSVR4ABI();
+  bool isPPC64 = Subtarget.isPPC64();
   bool isDarwinABI  = Subtarget.isDarwinABI();
   MachineFrameInfo *MFI = MF.getFrameInfo();
  
   // If the frame pointer save index hasn't been defined yet.
-  if (!FPSI && needsFP(MF) && IsSVR4ABI) {
+  if (!FPSI && needsFP(MF)) {
     // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
                                                            isDarwinABI);
     // Allocate the frame index for frame pointer save area.
-    FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
                                                 true, false);
     // Save the result.
     FI->setFramePointerSaveIndex(FPSI);                      
@@ -1067,7 +1066,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     if (needsFP(MF) || spillsCR(MF)) {
       const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
       const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
-      const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC;
+      const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
       RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                          RC->getAlignment(),
                                                          false));
@@ -1297,7 +1296,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   int NegFrameSize = -FrameSize;
   
   // Get processor type.
-  bool IsPPC64 = Subtarget.isPPC64();
+  bool isPPC64 = Subtarget.isPPC64();
   // Get operating system
   bool isDarwinABI = Subtarget.isDarwinABI();
   // Check if the link register (LR) must be saved.
@@ -1306,7 +1305,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF) && FrameSize;
   
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+  int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
 
   int FPOffset = 0;
   if (HasFP) {
@@ -1316,11 +1315,11 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
       assert(FPIndex && "No Frame Pointer Save Slot!");
       FPOffset = FFI->getObjectOffset(FPIndex);
     } else {
-      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
     }
   }
 
-  if (IsPPC64) {
+  if (isPPC64) {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
       
@@ -1361,7 +1360,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   // Adjust stack pointer: r1 += NegFrameSize.
   // If there is a preferred stack alignment, align R1 now
-  if (!IsPPC64) {
+  if (!isPPC64) {
     // PPC32.
     if (ALIGN_STACK && MaxAlign > TargetAlign) {
       assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
@@ -1444,19 +1443,19 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
       MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
       Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
     } else {
-      MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31);
+      MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
       Moves.push_back(MachineMove(FrameLabelId, SP, SP));
     }
     
     if (HasFP) {
       MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
-      MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31);
+      MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
       Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
     }
 
     if (MustSaveLR) {
       MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
-      MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
+      MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
       Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
     }
   }
@@ -1465,7 +1464,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   // If there is a frame pointer, copy R1 into R31
   if (HasFP) {
-    if (!IsPPC64) {
+    if (!isPPC64) {
       BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
         .addReg(PPC::R1)
         .addReg(PPC::R1);
@@ -1481,8 +1480,8 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
       // Mark effective beginning of when frame pointer is ready.
       BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
 
-      MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
-                                    (IsPPC64 ? PPC::X1 : PPC::R1));
+      MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+                                    (isPPC64 ? PPC::X1 : PPC::R1));
       MachineLocation FPSrc(MachineLocation::VirtualFP);
       Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
     }
@@ -1528,7 +1527,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
   int FrameSize = MFI->getStackSize();
 
   // Get processor type.
-  bool IsPPC64 = Subtarget.isPPC64();
+  bool isPPC64 = Subtarget.isPPC64();
   // Get operating system
   bool isDarwinABI = Subtarget.isDarwinABI();
   // Check if the link register (LR) has been saved.
@@ -1537,7 +1536,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF) && FrameSize;
   
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+  int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
 
   int FPOffset = 0;
   if (HasFP) {
@@ -1547,7 +1546,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
       assert(FPIndex && "No Frame Pointer Save Slot!");
       FPOffset = FFI->getObjectOffset(FPIndex);
     } else {
-      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
     }
   }
   
@@ -1575,7 +1574,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
   if (FrameSize) {
     // The loaded (or persistent) stack pointer value is offset by the 'stwu'
     // on entry to the function.  Add this offset back now.
-    if (!IsPPC64) {
+    if (!isPPC64) {
       // If this function contained a fastcc call and PerformTailCallOpt is
       // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
       // call which invalidates the stack pointer value in SP(0). So we use the
@@ -1629,7 +1628,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
     }
   }
 
-  if (IsPPC64) {
+  if (isPPC64) {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
         .addImm(LROffset/4).addReg(PPC::X1);
@@ -1659,13 +1658,13 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
       MF.getFunction()->getCallingConv() == CallingConv::Fast) {
      PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
      unsigned CallerAllocatedAmt = FI->getMinReservedArea();
-     unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1;
-     unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31;
-     unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0;
-     unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI;
-     unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4;
-     unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS;
-     unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI;
+     unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+     unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+     unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+     unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+     unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+     unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+     unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
 
      if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
        BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index aad621f..2d8a687 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,6 +2,29 @@ Target Independent Opportunities:
 
 //===---------------------------------------------------------------------===//
 
+Dead argument elimination should be enhanced to handle cases when an argument is
+dead to an externally visible function.  Though the argument can't be removed
+from the externally visible function, the caller doesn't need to pass it in.
+For example in this testcase:
+
+  void foo(int X) __attribute__((noinline));
+  void foo(int X) { sideeffect(); }
+  void bar(int A) { foo(A+1); }
+
+We compile bar to:
+
+define void @bar(i32 %A) nounwind ssp {
+  %0 = add nsw i32 %A, 1                          ; <i32> [#uses=1]
+  tail call void @foo(i32 %0) nounwind noinline ssp
+  ret void
+}
+
+The add is dead, we could pass in 'i32 undef' instead.  This occurs for C++
+templates etc, which usually have linkonce_odr/weak_odr linkage, not internal
+linkage.
+
+//===---------------------------------------------------------------------===//
+
 With the recent changes to make the implicit def/use set explicit in
 machineinstrs, we should change the target descriptions for 'call' instructions
 so that the .td files don't list all the call-clobbered registers as implicit
@@ -220,7 +243,7 @@ so cool to turn it into something like:
 ... which would only do one 32-bit XOR per loop iteration instead of two.
 
 It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
-alas.
+this requires TBAA.
 
 //===---------------------------------------------------------------------===//
 
@@ -280,6 +303,9 @@ unsigned int popcount(unsigned int input) {
   return count;
 }
 
+This is a form of idiom recognition for loops, the same thing that could be
+useful for recognizing memset/memcpy.
+
 //===---------------------------------------------------------------------===//
 
 These should turn into single 16-bit (unaligned?) loads on little/big endian
@@ -343,7 +369,7 @@ PHI Slicing could be extended to do this.
 
 //===---------------------------------------------------------------------===//
 
-LSR should know what GPR types a target has.  This code:
+LSR should know what GPR types a target has from TargetData.  This code:
 
 volatile short X, Y; // globals
 
@@ -369,7 +395,6 @@ LBB1_2:
 
 LSR should reuse the "+" IV for the exit test.
 
-
 //===---------------------------------------------------------------------===//
 
 Tail call elim should be more aggressive, checking to see if the call is
@@ -441,25 +466,6 @@ entry:
 
 //===---------------------------------------------------------------------===//
 
-"basicaa" should know how to look through "or" instructions that act like add
-instructions.  For example in this code, the x*4+1 is turned into x*4 | 1, and
-basicaa can't analyze the array subscript, leading to duplicated loads in the
-generated code:
-
-void test(int X, int Y, int a[]) {
-int i;
-  for (i=2; i<1000; i+=4) {
-  a[i+0] = a[i-1+0]*a[i-2+0];
-  a[i+1] = a[i-1+1]*a[i-2+1];
-  a[i+2] = a[i-1+2]*a[i-2+2];
-  a[i+3] = a[i-1+3]*a[i-2+3];
-  }
-}
-
-BasicAA also doesn't do this for add.  It needs to know that &A[i+1] != &A[i].
-
-//===---------------------------------------------------------------------===//
-
 We should investigate an instruction sinking pass.  Consider this silly
 example in pic mode:
 
@@ -1110,6 +1116,8 @@ later.
 
 //===---------------------------------------------------------------------===//
 
+[STORE SINKING]
+
 Store sinking: This code:
 
 void f (int n, int *cond, int *res) {
@@ -1165,6 +1173,8 @@ This is GCC PR38204.
 
 //===---------------------------------------------------------------------===//
 
+[STORE SINKING]
+
 GCC PR37810 is an interesting case where we should sink load/store reload
 into the if block and outside the loop, so we don't reload/store it on the
 non-call path.
@@ -1192,7 +1202,7 @@ we don't sink the store.  We need partially dead store sinking.
 
 //===---------------------------------------------------------------------===//
 
-[PHI TRANSLATE GEPs]
+[LOAD PRE CRIT EDGE SPLITTING]
 
 GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack
 leading to excess stack traffic. This could be handled by GVN with some crazy
@@ -1209,99 +1219,59 @@ bb3:		; preds = %bb1, %bb2, %bb
 	%10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0
 	%11 = load i32* %10, align 4
 
-%11 is fully redundant, an in BB2 it should have the value %8.
+%11 is partially redundant, an in BB2 it should have the value %8.
+
+GCC PR33344 and PR35287 are similar cases.
 
-GCC PR33344 is a similar case.
 
 //===---------------------------------------------------------------------===//
 
-[PHI TRANSLATE INDEXED GEPs]  PR5313
+[LOAD PRE]
 
-Load redundancy elimination for simple loop.  This loop:
+There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
+GCC testsuite, ones we don't get yet are (checked through loadpre25):
 
-void append_text(const char* text,unsigned char * const  io) {
-  while(*text)
-    *io=*text++;
-}
+[CRIT EDGE BREAKING]
+loadpre3.c predcom-4.c
 
-Compiles to have a fully redundant load in the loop (%2):
+[PRE OF READONLY CALL]
+loadpre5.c
 
-define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind {
-entry:
-  %0 = load i8* %text, align 1                    ; <i8> [#uses=1]
-  %1 = icmp eq i8 %0, 0                           ; <i1> [#uses=1]
-  br i1 %1, label %return, label %bb
-
-bb:                                               ; preds = %bb, %entry
-  %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ]  ; <i32> [#uses=2]
-  %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1]
-  %2 = load i8* %text_addr.04, align 1            ; <i8> [#uses=1]
-  store i8 %2, i8* %io, align 1
-  %tmp = add i32 %indvar, 1                       ; <i32> [#uses=2]
-  %scevgep = getelementptr i8* %text, i32 %tmp    ; <i8*> [#uses=1]
-  %3 = load i8* %scevgep, align 1                 ; <i8> [#uses=1]
-  %4 = icmp eq i8 %3, 0                           ; <i1> [#uses=1]
-  br i1 %4, label %return, label %bb
-
-return:                                           ; preds = %bb, %entry
-  ret void
-}
+[TURN SELECT INTO BRANCH]
+loadpre14.c loadpre15.c 
 
-//===---------------------------------------------------------------------===//
+actually a conditional increment: loadpre18.c loadpre19.c
 
-There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
-GCC testsuite.  There are many pre testcases as ssa-pre-*.c
 
 //===---------------------------------------------------------------------===//
 
-There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the
-GCC testsuite.  For example, predcom-1.c is:
-
- for (i = 2; i < 1000; i++)
-    fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff;
-
-which compiles into:
-
-bb1:		; preds = %bb1, %bb1.thread
-	%indvar = phi i32 [ 0, %bb1.thread ], [ %0, %bb1 ]	
-	%i.0.reg2mem.0 = add i32 %indvar, 2		
-	%0 = add i32 %indvar, 1		; <i32> [#uses=3]
-	%1 = getelementptr [1000 x i32]* @fib, i32 0, i32 %0		
-	%2 = load i32* %1, align 4		; <i32> [#uses=1]
-	%3 = getelementptr [1000 x i32]* @fib, i32 0, i32 %indvar	
-	%4 = load i32* %3, align 4		; <i32> [#uses=1]
-	%5 = add i32 %4, %2		; <i32> [#uses=1]
-	%6 = and i32 %5, 65535		; <i32> [#uses=1]
-	%7 = getelementptr [1000 x i32]* @fib, i32 0, i32 %i.0.reg2mem.0
-	store i32 %6, i32* %7, align 4
-	%exitcond = icmp eq i32 %0, 998		; <i1> [#uses=1]
-	br i1 %exitcond, label %return, label %bb1
+[SCALAR PRE]
+There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the
+GCC testsuite.
 
-This is basically:
-  LOAD fib[i+1]
-  LOAD fib[i]
-  STORE fib[i+2]
+//===---------------------------------------------------------------------===//
 
-instead of handling this as a loop or other xform, all we'd need to do is teach
-load PRE to phi translate the %0 add (i+1) into the predecessor as (i'+1+1) =
-(i'+2) (where i' is the previous iteration of i).  This would find the store
-which feeds it.
+There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the
+GCC testsuite.  For example, we get the first example in predcom-1.c, but 
+miss the second one:
 
-predcom-2.c is apparently the same as predcom-1.c
-predcom-3.c is very similar but needs loads feeding each other instead of
-store->load.
-predcom-4.c seems the same as the rest.
+unsigned fib[1000];
+unsigned avg[1000];
 
+__attribute__ ((noinline))
+void count_averages(int n) {
+  int i;
+  for (i = 1; i < n; i++)
+    avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff;
+}
 
-//===---------------------------------------------------------------------===//
+which compiles into two loads instead of one in the loop.
 
-Other simple load PRE cases:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting]
+predcom-2.c is the same as predcom-1.c
 
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge)
-  llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis
+predcom-3.c is very similar but needs loads feeding each other instead of
+store->load.
 
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
 
 //===---------------------------------------------------------------------===//
 
@@ -1334,7 +1304,7 @@ Interesting missed case because of control flow flattening (should be 2 loads):
 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
 With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | 
              opt -mem2reg -gvn -instcombine | llvm-dis
-we miss it because we need 1) GEP PHI TRAN, 2) CRIT EDGE 3) MULTIPLE DIFFERENT
+we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT
 VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 133f828..1b3ca3e 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -644,10 +644,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 
-  // We don't have line number support yet.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-  setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
 
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
@@ -663,8 +659,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 
   // No debug info support yet.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
 
   setStackPointerRegisterToSaveRestore(SP::O6);
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 6fdbc92..f887523 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -783,8 +783,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   }
 
   // Exception Handling.
-  LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
-                                SectionKind::getReadOnlyWithRel());
+  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
+                                SectionKind::getDataRel());
   EHFrameSection =
     getMachOSection("__TEXT", "__eh_frame",
                     MCSectionMachO::S_COALESCED |
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index be9f4b2..38c0c28 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -43,7 +43,6 @@ MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
                                Twine(AsmPrinter.getFunctionNumber())+"$pb");
 }
 
-
 /// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an
 /// MCOperand.
 MCSymbol *X86MCInstLower::
@@ -231,6 +230,19 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
   return Ctx.GetOrCreateSymbol(Name.str());
 }
 
+MCSymbol *X86MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  const char *Suffix = "";
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on BA operand");
+  case X86II::MO_NO_FLAG:         break; // No flag.
+  case X86II::MO_PIC_BASE_OFFSET: break; // Doesn't modify symbol name.
+  case X86II::MO_GOTOFF: Suffix = "@GOTOFF"; break;
+  }
+
+  return AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress(), Suffix);
+}
+
 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
                                              MCSymbol *Sym) const {
   // FIXME: We would like an efficient form for this, so we don't have to do a
@@ -331,8 +343,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
       break;
     case MachineOperand::MO_BlockAddress:
-      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(
-                                                 MO.getBlockAddress()));
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
       break;
     }
     
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
index fa25b90..94f8bfc 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -43,6 +43,7 @@ public:
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
   MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
   MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
   
 private:
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..b329e89
--- /dev/null
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Disassembler
+  X86Disassembler.cpp
+  )
+add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile
new file mode 100644
index 0000000..b289647
--- /dev/null
+++ b/lib/Target/X86/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Disassembler
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 0000000..2ebbc9b
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,29 @@
+//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "X86.h"
+using namespace llvm;
+
+static const MCDisassembler *createX86_32Disassembler(const Target &T) {
+  return 0;
+}
+
+static const MCDisassembler *createX86_64Disassembler(const Target &T) {
+  return 0; 
+}
+
+extern "C" void LLVMInitializeX86Disassembler() { 
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 
+                                         createX86_32Disassembler);
+  TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+                                         createX86_64Disassembler);
+}
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 220831d..b311a6e 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -18,6 +18,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
                 X86GenFastISel.inc \
                 X86GenCallingConv.inc X86GenSubtarget.inc
 
-DIRS = AsmPrinter AsmParser TargetInfo
+DIRS = AsmPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 4497931..4892e17 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -595,7 +595,6 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       break;
     case TargetInstrInfo::IMPLICIT_DEF:
     case TargetInstrInfo::KILL:
-    case X86::DWARF_LOC:
     case X86::FP_REG_KILL:
       break;
     case X86::MOVPC32r: {
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6a3577a..a9a78be 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -252,8 +252,8 @@ namespace {
       else if (AM.JT != -1)
         Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
       else if (AM.BlockAddr)
-        Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/,
-                                       true /*AM.SymbolFlags*/);
+        Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+                                       true, AM.SymbolFlags);
       else
         Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
 
@@ -777,7 +777,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       AM.SymbolFlags = J->getTargetFlags();
     } else {
       AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
-      //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
     }
 
     if (N.getOpcode() == X86ISD::WrapperRIP)
@@ -808,7 +808,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       AM.SymbolFlags = J->getTargetFlags();
     } else {
       AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
-      //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
     }
     return false;
   }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6018cf5..d80b8ec 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -373,13 +373,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
   }
 
-  // Use the default ISD::DBG_STOPPOINT.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
   // FIXME - use subtarget debug flags
   if (!Subtarget->isTargetDarwin() &&
       !Subtarget->isTargetELF() &&
       !Subtarget->isTargetCygMing()) {
-    setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
   }
 
@@ -978,6 +975,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   computeRegisterProperties();
 
+  // Divide and reminder operations have no vector equivalent and can
+  // trap. Do a custom widening for these operations in which we never
+  // generate more divides/remainder than the original vector width.
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+    if (!isTypeLegal((MVT::SimpleValueType)VT)) {
+      setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
+      setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
+      setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
+      setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
+    }
+  }
+
   // FIXME: These should be based on subtarget info. Plus, the values should
   // be smaller when we are in optimizing for size mode.
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
@@ -4722,18 +4732,27 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
 
 SDValue
 X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
-  unsigned WrapperKind = X86ISD::Wrapper;
+  // Create the TargetBlockAddressAddress node.
+  unsigned char OpFlags =
+    Subtarget->ClassifyBlockAddressReference();
   CodeModel::Model M = getTargetMachine().getCodeModel();
+  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
+                                       /*isTarget=*/true, OpFlags);
+
   if (Subtarget->isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel))
-    WrapperKind = X86ISD::WrapperRIP;
-
-  DebugLoc DL = Op.getDebugLoc();
-
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+  else
+    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
 
-  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  // With PIC, the address is actually $g + Offset.
+  if (isGlobalRelativeToPICBase(OpFlags)) {
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+                         Result);
+  }
 
   return Result;
 }
@@ -7164,6 +7183,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(edx.getValue(1));
     return;
   }
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM: {
+    EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+    Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
+    return;
+  }
   case ISD::ATOMIC_CMP_SWAP: {
     EVT T = N->getValueType(0);
     assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index a01534b..b5fa862 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1663,7 +1663,7 @@ def : Pat<(X86tcret GR64:$dst, imm:$off),
           (TCRETURNri64 GR64:$dst, imm:$off)>;
 
 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
+          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>;
 
 def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
           (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 1ddceb1..a37013d 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3133,7 +3133,6 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
       break;
     case TargetInstrInfo::IMPLICIT_DEF:
     case TargetInstrInfo::KILL:
-    case X86::DWARF_LOC:
     case X86::FP_REG_KILL:
       break;
     case X86::MOVPC32r: {
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a79f262..90ef1f4 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -718,7 +718,6 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o
                  []>;
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-
   def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst  # TAILCALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -3506,16 +3505,6 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                    [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
 
 //===----------------------------------------------------------------------===//
-// DWARF Pseudo Instructions
-//
-
-def DWARF_LOC   : I<0, Pseudo, (outs),
-                    (ins i32imm:$line, i32imm:$col, i32imm:$file),
-                    ".loc\t$file $line $col",
-                    [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
-                      (i32 imm:$file))]>;
-
-//===----------------------------------------------------------------------===//
 // EH Pseudo Instructions
 //
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ee63d56..dfdd4ce 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2820,40 +2820,40 @@ defm PSIGND      : SS3I_binop_rm_int_32<0x0A, "psignd",
 
 let Constraints = "$src1 = $dst" in {
   def PALIGNR64rr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
-                           (ins VR64:$src1, VR64:$src2, i16imm:$src3),
+                           (ins VR64:$src1, VR64:$src2, i8imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                            []>;
   def PALIGNR64rm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
-                           (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
+                           (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                            []>;
 
   def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
-                           (ins VR128:$src1, VR128:$src2, i32imm:$src3),
+                           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                            []>, OpSize;
   def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
-                           (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
+                           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
                            "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                            []>, OpSize;
 }
 
 // palignr patterns.
-def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i16 imm:$src3)),
+def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)),
           (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>,
           Requires<[HasSSSE3]>;
 def : Pat<(int_x86_ssse3_palign_r VR64:$src1,
                                       (memop64 addr:$src2),
-                                      (i16 imm:$src3)),
+                                      (i8 imm:$src3)),
           (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
           Requires<[HasSSSE3]>;
 
-def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i32 imm:$src3)),
+def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)),
           (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>,
           Requires<[HasSSSE3]>;
 def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1,
                                       (memopv2i64 addr:$src2),
-                                      (i32 imm:$src3)),
+                                      (i8 imm:$src3)),
           (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
           Requires<[HasSSSE3]>;
 
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 0792bdd..ce06f0f 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -426,83 +426,77 @@ X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
 
 void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
                                              JITCodeEmitter &JCE) {
+  MachineCodeEmitter::BufferState BS;
 #if defined (X86_64_JIT)
-  JCE.startGVStub(GV, 8, 8);
+  JCE.startGVStub(BS, GV, 8, 8);
   JCE.emitWordLE((unsigned)(intptr_t)ptr);
   JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32));
 #else
-  JCE.startGVStub(GV, 4, 4);
+  JCE.startGVStub(BS, GV, 4, 4);
   JCE.emitWordLE((intptr_t)ptr);
 #endif
-  return JCE.finishGVStub(GV);
+  return JCE.finishGVStub(BS);
 }
 
-void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn,
+TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
+  // The 64-bit stub contains:
+  //   movabs r10 <- 8-byte-target-address  # 10 bytes
+  //   call|jmp *r10  # 3 bytes
+  // The 32-bit stub contains a 5-byte call|jmp.
+  // If the stub is a call to the compilation callback, an extra byte is added
+  // to mark it as a stub.
+  StubLayout Result = {14, 4};
+  return Result;
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
                                    JITCodeEmitter &JCE) {
+  MachineCodeEmitter::BufferState BS;
   // Note, we cast to intptr_t here to silence a -pedantic warning that 
   // complains about casting a function pointer to a normal pointer.
 #if defined (X86_32_JIT) && !defined (_MSC_VER)
-  bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback &&
-                Fn != (void*)(intptr_t)X86CompilationCallback_SSE);
+  bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
+                Target != (void*)(intptr_t)X86CompilationCallback_SSE);
 #else
-  bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback;
+  bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
 #endif
+  JCE.emitAlignment(4);
+  void *Result = (void*)JCE.getCurrentPCValue();
   if (NotCC) {
 #if defined (X86_64_JIT)
-    JCE.startGVStub(F, 13, 4);
     JCE.emitByte(0x49);          // REX prefix
     JCE.emitByte(0xB8+2);        // movabsq r10
-    JCE.emitWordLE((unsigned)(intptr_t)Fn);
-    JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+    JCE.emitWordLE((unsigned)(intptr_t)Target);
+    JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
     JCE.emitByte(0x41);          // REX prefix
     JCE.emitByte(0xFF);          // jmpq *r10
     JCE.emitByte(2 | (4 << 3) | (3 << 6));
 #else
-    JCE.startGVStub(F, 5, 4);
     JCE.emitByte(0xE9);
-    JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+    JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
 #endif
-    return JCE.finishGVStub(F);
+    return Result;
   }
 
 #if defined (X86_64_JIT)
-  JCE.startGVStub(F, 14, 4);
   JCE.emitByte(0x49);          // REX prefix
   JCE.emitByte(0xB8+2);        // movabsq r10
-  JCE.emitWordLE((unsigned)(intptr_t)Fn);
-  JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+  JCE.emitWordLE((unsigned)(intptr_t)Target);
+  JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
   JCE.emitByte(0x41);          // REX prefix
   JCE.emitByte(0xFF);          // callq *r10
   JCE.emitByte(2 | (2 << 3) | (3 << 6));
 #else
-  JCE.startGVStub(F, 6, 4);
   JCE.emitByte(0xE8);   // Call with 32 bit pc-rel destination...
 
-  JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+  JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
 #endif
 
   // This used to use 0xCD, but that value is used by JITMemoryManager to
   // initialize the buffer with garbage, which means it may follow a
   // noreturn function call, confusing X86CompilationCallback2.  PR 4929.
   JCE.emitByte(0xCE);   // Interrupt - Just a marker identifying the stub!
-  return JCE.finishGVStub(F);
-}
-
-void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub,
-                                        JITCodeEmitter &JCE) {
-  // Note, we cast to intptr_t here to silence a -pedantic warning that 
-  // complains about casting a function pointer to a normal pointer.
-  JCE.startGVStub(F, Stub, 5);
-  JCE.emitByte(0xE9);
-#if defined (X86_64_JIT) && !defined (NDEBUG)
-  // Yes, we need both of these casts, or some broken versions of GCC (4.2.4)
-  // get the signed-ness of the expression wrong.  Go figure.
-  intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5;
-  assert(((Displacement << 32) >> 32) == Displacement
-         && "PIC displacement does not fit in displacement field!");
-#endif
-  JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
-  JCE.finishGVStub(F);
+  return Result;
 }
 
 /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index c381433..238420c 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -43,18 +43,16 @@ namespace llvm {
     virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
                                              JITCodeEmitter &JCE);
 
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on X86.
+    virtual StubLayout getStubLayout();
+
     /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
     /// small native function that simply calls the function at the specified
     /// address.
-    virtual void *emitFunctionStub(const Function* F, void *Fn,
+    virtual void *emitFunctionStub(const Function* F, void *Target,
                                    JITCodeEmitter &JCE);
 
-    /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to
-    /// emit a small native function that simply calls Fn. Emit the stub into
-    /// the supplied buffer.
-    virtual void emitFunctionStubAtAddr(const Function* F, void *Fn,
-                                        void *Buffer, JITCodeEmitter &JCE);
-
     /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
     /// specific basic block.
     virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f577fcf..33852bd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -1262,7 +1262,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
     else if (RetOpcode== X86::TCRETURNri64)
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
     else
-       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
 
     // Delete the pseudo instruction TCRETURN.
     MBB.erase(MBBI);
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index b901c14..661f560 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -28,6 +28,21 @@ using namespace llvm;
 #include <intrin.h>
 #endif
 
+/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyBlockAddressReference() const {
+  if (isPICStyleGOT())    // 32-bit ELF targets.
+    return X86II::MO_GOTOFF;
+  
+  if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
+    return X86II::MO_PIC_BASE_OFFSET;
+  
+  // Direct static reference to label.
+  return X86II::MO_NO_FLAG;
+}
+
 /// ClassifyGlobalReference - Classify a global variable reference for the
 /// current subtarget according to how we should reference it in a non-pcrel
 /// context.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 23f2841..fb457dd 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -199,6 +199,11 @@ public:
   unsigned char ClassifyGlobalReference(const GlobalValue *GV,
                                         const TargetMachine &TM)const;
 
+  /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+  /// current subtarget according to how we should reference it in a non-pcrel
+  /// context.
+  unsigned char ClassifyBlockAddressReference() const;
+
   /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
   /// to immediate address.
   bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 00dcce6..f310456 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -142,10 +142,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
   
-  // Debug
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
   maxStoresPerMemset = 4;
   maxStoresPerMemmove = maxStoresPerMemcpy = 2;
 
@@ -295,7 +291,7 @@ LowerBlockAddress(SDValue Op, SelectionDAG &DAG)
   DebugLoc DL = Op.getDebugLoc();
 
   BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
 
   return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
 }
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index b3a832f..a16d335 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -212,7 +212,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
 
     for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
       if (isa<PointerType>(A->getType()) && !A->hasNoCaptureAttr() &&
-          !PointerMayBeCaptured(A, true)) {
+          !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
         A->addAttr(Attribute::NoCapture);
         ++NumNoCapture;
         Changed = true;
@@ -280,7 +280,7 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
           return false;  // Did not come from an allocation.
       }
 
-    if (PointerMayBeCaptured(RetVal, false))
+    if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
       return false;
   }
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 442f2fb..4635d0e 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1898,6 +1898,15 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
     // Global variables without names cannot be referenced outside this module.
     if (!GV->hasName() && !GV->isDeclaration())
       GV->setLinkage(GlobalValue::InternalLinkage);
+    // Simplify the initializer.
+    if (GV->hasInitializer())
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+        TargetData *TD = getAnalysisIfAvailable<TargetData>();
+        Constant *New = ConstantFoldConstantExpression(CE, TD);
+        if (New && New != CE)
+          GV->setInitializer(New);
+      }
+    // Do more involved optimizations if the global is internal.
     if (!GV->isConstant() && GV->hasLocalLinkage() &&
         GV->hasInitializer())
       Changed |= ProcessInternalGlobal(GV, GVI);
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 023e642..df2456f 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -155,7 +154,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
 // callers will be updated to use the value they pass in directly instead of
 // using the return value.
 bool IPCP::PropagateConstantReturn(Function &F) {
-  if (F.getReturnType() == Type::getVoidTy(F.getContext()))
+  if (F.getReturnType()->isVoidTy())
     return false; // No return value.
 
   // If this function could be overridden later in the link stage, we can't
@@ -163,8 +162,6 @@ bool IPCP::PropagateConstantReturn(Function &F) {
   if (F.mayBeOverridden())
     return false;
     
-  LLVMContext &Context = F.getContext();
-  
   // Check to see if this function returns a constant.
   SmallVector<Value *,4> RetVals;
   const StructType *STy = dyn_cast<StructType>(F.getReturnType());
@@ -188,7 +185,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
         if (!STy)
           V = RI->getOperand(i);
         else
-          V = FindInsertedValue(RI->getOperand(0), i, Context);
+          V = FindInsertedValue(RI->getOperand(0), i);
 
         if (V) {
           // Ignore undefs, we can change them into anything
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index a8f39c1..6f1c32c 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1425,26 +1425,40 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   assert(UnavailablePred != 0 &&
          "Fully available value should be eliminated above!");
 
-  // If the loaded pointer is PHI node defined in this block, do PHI translation
-  // to get its value in the predecessor.
-  Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
-
-  // Make sure the value is live in the predecessor.  If it was defined by a
-  // non-PHI instruction in this block, we don't know how to recompute it above.
-  if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
-    if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
-      DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
-                   << *LPInst << '\n' << *LI << "\n");
-      return false;
-    }
-
   // We don't currently handle critical edges :(
   if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
     DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
                  << UnavailablePred->getName() << "': " << *LI << '\n');
     return false;
   }
-
+  
+  // Do PHI translation to get its value in the predecessor if necessary.  The
+  // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+  //
+  // FIXME: This may insert a computation, but we don't tell scalar GVN
+  // optimization stuff about it.  How do we do this?
+  SmallVector<Instruction*, 8> NewInsts;
+  Value *LoadPtr = 0;
+  
+  // If all preds have a single successor, then we know it is safe to insert the
+  // load on the pred (?!?), so we can insert code to materialize the pointer if
+  // it is not available.
+  if (allSingleSucc) {
+    LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB,
+                                             UnavailablePred, TD, *DT,NewInsts);
+  } else {
+    LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB,
+                                                 UnavailablePred, TD, *DT);
+  }
+    
+  // If we couldn't find or insert a computation of this phi translated value,
+  // we fail PRE.
+  if (LoadPtr == 0) {
+    DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+                 << *LI->getOperand(0) << "\n");
+    return false;
+  }
+  
   // Make sure it is valid to move this load here.  We have to watch out for:
   //  @1 = getelementptr (i8* p, ...
   //  test p and branch if == 0
@@ -1455,14 +1469,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // we do not have this case.  Otherwise, check that the load is safe to
   // put anywhere; this can be improved, but should be conservatively safe.
   if (!allSingleSucc &&
-      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))
+      // FIXME: REEVALUTE THIS.
+      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) {
+    assert(NewInsts.empty() && "Should not have inserted instructions");
     return false;
+  }
 
   // Okay, we can eliminate this load by inserting a reload in the predecessor
   // and using PHI construction to get the value in the other predecessors, do
   // it.
   DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
-
+  DEBUG(if (!NewInsts.empty())
+          errs() << "INSERTED " << NewInsts.size() << " INSTS: "
+                 << *NewInsts.back() << '\n');
+  
   Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
                                 LI->getAlignment(),
                                 UnavailablePred->getTerminator());
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 1c48366..d12ad81 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2163,8 +2163,8 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
   
   // Add has the property that adding any two 2's complement numbers can only 
   // have one carry bit which can change a sign.  As such, if LHS and RHS each
-  // have at least two sign bits, we know that the addition of the two values will
-  // sign extend fine.
+  // have at least two sign bits, we know that the addition of the two values
+  // will sign extend fine.
   if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
     return true;
   
@@ -2184,15 +2184,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
-  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-    // X + undef -> undef
-    if (isa<UndefValue>(RHS))
-      return ReplaceInstUsesWith(I, RHS);
-
-    // X + 0 --> X
-    if (RHSC->isNullValue())
-      return ReplaceInstUsesWith(I, LHS);
+  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), TD))
+    return ReplaceInstUsesWith(I, V);
 
+  
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
       // X + (signbit) --> X ^ signbit
       const APInt& Val = CI->getValue();
@@ -4070,6 +4067,21 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
 /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
 Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
                                           ICmpInst *LHS, ICmpInst *RHS) {
+  // (icmp eq A, null) & (icmp eq B, null) -->
+  //     (icmp eq (ptrtoint(A)|ptrtoint(B)), 0)
+  if (TD &&
+      LHS->getPredicate() == ICmpInst::ICMP_EQ &&
+      RHS->getPredicate() == ICmpInst::ICMP_EQ &&
+      isa<ConstantPointerNull>(LHS->getOperand(1)) &&
+      isa<ConstantPointerNull>(RHS->getOperand(1))) {
+    const Type *IntPtrTy = TD->getIntPtrType(I.getContext());
+    Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy);
+    Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy);
+    Value *NewOr = Builder->CreateOr(A, B);
+    return new ICmpInst(ICmpInst::ICMP_EQ, NewOr,
+                        Constant::getNullValue(IntPtrTy));
+  }
+  
   Value *Val, *Val2;
   ConstantInt *LHSCst, *RHSCst;
   ICmpInst::Predicate LHSCC, RHSCC;
@@ -4081,12 +4093,20 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
                          m_ConstantInt(RHSCst))))
     return 0;
   
-  // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
-  // where C is a power of 2
-  if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT &&
-      LHSCst->getValue().isPowerOf2()) {
-    Value *NewOr = Builder->CreateOr(Val, Val2);
-    return new ICmpInst(LHSCC, NewOr, LHSCst);
+  if (LHSCst == RHSCst && LHSCC == RHSCC) {
+    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+    // where C is a power of 2
+    if (LHSCC == ICmpInst::ICMP_ULT &&
+        LHSCst->getValue().isPowerOf2()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return new ICmpInst(LHSCC, NewOr, LHSCst);
+    }
+    
+    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return new ICmpInst(LHSCC, NewOr, LHSCst);
+    }
   }
   
   // From here on, we only handle:
@@ -4322,7 +4342,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
   if (Value *V = SimplifyAndInst(Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
-    
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
@@ -4743,16 +4762,37 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
 /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
 Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
                                          ICmpInst *LHS, ICmpInst *RHS) {
+  // (icmp ne A, null) | (icmp ne B, null) -->
+  //     (icmp ne (ptrtoint(A)|ptrtoint(B)), 0)
+  if (TD &&
+      LHS->getPredicate() == ICmpInst::ICMP_NE &&
+      RHS->getPredicate() == ICmpInst::ICMP_NE &&
+      isa<ConstantPointerNull>(LHS->getOperand(1)) &&
+      isa<ConstantPointerNull>(RHS->getOperand(1))) {
+    const Type *IntPtrTy = TD->getIntPtrType(I.getContext());
+    Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy);
+    Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy);
+    Value *NewOr = Builder->CreateOr(A, B);
+    return new ICmpInst(ICmpInst::ICMP_NE, NewOr,
+                        Constant::getNullValue(IntPtrTy));
+  }
+  
   Value *Val, *Val2;
   ConstantInt *LHSCst, *RHSCst;
   ICmpInst::Predicate LHSCC, RHSCC;
   
   // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
-  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
-             m_ConstantInt(LHSCst))) ||
-      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
-             m_ConstantInt(RHSCst))))
+  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
+      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
     return 0;
+
+  
+  // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+  if (LHSCst == RHSCst && LHSCC == RHSCC &&
+      LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+    Value *NewOr = Builder->CreateOr(Val, Val2);
+    return new ICmpInst(LHSCC, NewOr, LHSCst);
+  }
   
   // From here on, we only handle:
   //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
@@ -8539,6 +8579,36 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
     }
   }
 
+  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+  // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+  // may lead to additional simplifications.
+  if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+    if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+      uint32_t BitWidth = ITy->getBitWidth();
+      if (BitWidth > 1) {
+        Value *LHS = ICI->getOperand(0);
+        Value *RHS = ICI->getOperand(1);
+
+        APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+        APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+        APInt TypeMask(APInt::getHighBitsSet(BitWidth, BitWidth-1));
+        ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+        ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+        if (KnownZeroLHS.countLeadingOnes() == BitWidth-1 &&
+            KnownZeroRHS.countLeadingOnes() == BitWidth-1) {
+          if (!DoXform) return ICI;
+
+          Value *Xor = Builder->CreateXor(LHS, RHS);
+          if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+            Xor = Builder->CreateXor(Xor, ConstantInt::get(ITy, 1));
+          Xor->takeName(ICI);
+          return ReplaceInstUsesWith(CI, Xor);
+        }
+      }
+    }
+  }
+
   return 0;
 }
 
@@ -9842,6 +9912,126 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       if (Operand->getIntrinsicID() == Intrinsic::bswap)
         return ReplaceInstUsesWith(CI, Operand->getOperand(1));
     break;
+  case Intrinsic::uadd_with_overflow: {
+    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt Mask = APInt::getSignBit(BitWidth);
+    APInt LHSKnownZero(BitWidth, 0);
+    APInt LHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
+    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
+
+    if (LHSKnownNegative || LHSKnownPositive) {
+      APInt RHSKnownZero(BitWidth, 0);
+      APInt RHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
+      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
+      if (LHSKnownNegative && RHSKnownNegative) {
+        // The sign bit is set in both cases: this MUST overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
+        Worklist.Add(Add);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()), ConstantInt::getTrue(*Context)
+        };
+        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+      
+      if (LHSKnownPositive && RHSKnownPositive) {
+        // The sign bit is clear in both cases: this CANNOT overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
+        Worklist.Add(Add);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()), ConstantInt::getFalse(*Context)
+        };
+        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+    }
+  }
+  // FALL THROUGH uadd into sadd
+  case Intrinsic::sadd_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getOperand(1)) &&
+        !isa<Constant>(II->getOperand(2))) {
+      Value *LHS = II->getOperand(1);
+      II->setOperand(1, II->getOperand(2));
+      II->setOperand(2, LHS);
+      return II;
+    }
+
+    // X + undef -> undef
+    if (isa<UndefValue>(II->getOperand(2)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+      // X + 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getOperand(0)->getType()),
+          ConstantInt::getFalse(*Context)
+        };
+        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
+        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+      }
+    }
+    break;
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+    // undef - X -> undef
+    // X - undef -> undef
+    if (isa<UndefValue>(II->getOperand(1)) ||
+        isa<UndefValue>(II->getOperand(2)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+      // X - 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getOperand(1)->getType()),
+          ConstantInt::getFalse(*Context)
+        };
+        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
+        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+      }
+    }
+    break;
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::smul_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getOperand(1)) &&
+        !isa<Constant>(II->getOperand(2))) {
+      Value *LHS = II->getOperand(1);
+      II->setOperand(1, II->getOperand(2));
+      II->setOperand(2, LHS);
+      return II;
+    }
+
+    // X * undef -> undef
+    if (isa<UndefValue>(II->getOperand(2)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
+      // X*0 -> {0, false}
+      if (RHSI->isZero())
+        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+      
+      // X * 1 -> {X, false}
+      if (RHSI->equalsInt(1)) {
+        Constant *V[] = {
+          UndefValue::get(II->getOperand(1)->getType()),
+          ConstantInt::getFalse(*Context)
+        };
+        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
+        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+      }
+    }
+    break;
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
   case Intrinsic::x86_sse_loadu_ps:
@@ -11282,21 +11472,16 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
 }
 
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
+    return ReplaceInstUsesWith(GEP, V);
+
   Value *PtrOp = GEP.getOperand(0);
-  // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops.
-  if (GEP.getNumOperands() == 1)
-    return ReplaceInstUsesWith(GEP, PtrOp);
 
   if (isa<UndefValue>(GEP.getOperand(0)))
     return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
 
-  bool HasZeroPointerIndex = false;
-  if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1)))
-    HasZeroPointerIndex = C->isNullValue();
-
-  if (GEP.getNumOperands() == 2 && HasZeroPointerIndex)
-    return ReplaceInstUsesWith(GEP, PtrOp);
-
   // Eliminate unneeded casts for indices.
   if (TD) {
     bool MadeChange = false;
@@ -11401,6 +11586,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       return 0;
     }
     
+    bool HasZeroPointerIndex = false;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+      HasZeroPointerIndex = C->isZero();
+    
     // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
     // into     : GEP [10 x i8]* X, i32 0, ...
     //
@@ -11952,12 +12141,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   Value *Val = SI.getOperand(0);
   Value *Ptr = SI.getOperand(1);
 
-  if (isa<UndefValue>(Ptr)) {     // store X, undef -> noop (even if volatile)
-    EraseInstFromFunction(SI);
-    ++NumCombined;
-    return 0;
-  }
-  
   // If the RHS is an alloca with a single use, zapify the store, making the
   // alloca dead.
   // If the RHS is an alloca with a two uses, the other one being a 
@@ -12920,7 +13103,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
       if (LHSMask.size() == Mask.size()) {
         std::vector<unsigned> NewMask;
         for (unsigned i = 0, e = Mask.size(); i != e; ++i)
-          if (Mask[i] >= 2*e)
+          if (Mask[i] >= e)
             NewMask.push_back(2*e);
           else
             NewMask.push_back(LHSMask[Mask[i]]);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 5864113..1b93f34 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -158,12 +158,18 @@ bool JumpThreading::runOnFunction(Function &F) {
           if (BBI->isTerminator()) {
             // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
             // block, we have to make sure it isn't in the LoopHeaders set.  We
-            // reinsert afterward in the rare case when the block isn't deleted.
+            // reinsert afterward if needed.
             bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+            BasicBlock *Succ = BI->getSuccessor(0);
             
-            if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+            if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
               Changed = true;
-            else if (ErasedFromLoopHeaders)
+              // If we deleted BB and BB was the header of a loop, then the
+              // successor is now the header of the loop.
+              BB = Succ;
+            }
+            
+            if (ErasedFromLoopHeaders)
               LoopHeaders.insert(BB);
           }
         }
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 104c873..5511387 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -63,15 +63,6 @@ static cl::opt<bool>
 DisablePromotion("disable-licm-promotion", cl::Hidden,
                  cl::desc("Disable memory promotion in LICM pass"));
 
-// This feature is currently disabled by default because CodeGen is not yet
-// capable of rematerializing these constants in PIC mode, so it can lead to
-// degraded performance. Compile test/CodeGen/X86/remat-constant.ll with
-// -relocation-model=pic to see an example of this.
-static cl::opt<bool>
-EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden,
-                         cl::desc("Enable hoisting/sinking of constant "
-                                  "global variables"));
-
 namespace {
   struct LICM : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
@@ -383,8 +374,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
 
     // Loads from constant memory are always safe to move, even if they end up
     // in the same alias set as something that ends up being modified.
-    if (EnableLICMConstantMotion &&
-        AA->pointsToConstantMemory(LI->getOperand(0)))
+    if (AA->pointsToConstantMemory(LI->getOperand(0)))
       return true;
     
     // Don't hoist loads which have may-aliased stores in loop.
@@ -603,7 +593,7 @@ void LICM::sink(Instruction &I) {
     if (AI) {
       std::vector<AllocaInst*> Allocas;
       Allocas.push_back(AI);
-      PromoteMemToReg(Allocas, *DT, *DF, AI->getContext(), CurAST);
+      PromoteMemToReg(Allocas, *DT, *DF, CurAST);
     }
   }
 }
@@ -779,7 +769,7 @@ void LICM::PromoteValuesInLoop() {
   PromotedAllocas.reserve(PromotedValues.size());
   for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
     PromotedAllocas.push_back(PromotedValues[i].first);
-  PromoteMemToReg(PromotedAllocas, *DT, *DF, Preheader->getContext(), CurAST);
+  PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
 }
 
 /// FindPromotableValuesInLoop - Check the current loop for stores to definite
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index c202a2c..d8c59b1 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1869,8 +1869,16 @@ bool IPSCCP::runOnModule(Module &M) {
     for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
       // If there are any PHI nodes in this successor, drop entries for BB now.
       BasicBlock *DeadBB = BlocksToErase[i];
-      while (!DeadBB->use_empty()) {
-        Instruction *I = cast<Instruction>(DeadBB->use_back());
+      for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
+           UI != UE; ) {
+        // Grab the user and then increment the iterator early, as the user
+        // will be deleted. Step past all adjacent uses from the same user.
+        Instruction *I = dyn_cast<Instruction>(*UI);
+        do { ++UI; } while (UI != UE && *UI == I);
+
+        // Ignore blockaddress users; BasicBlock's dtor will handle them.
+        if (!I) continue;
+
         bool Folded = ConstantFoldTerminator(I->getParent());
         if (!Folded) {
           // The constant folder may not have been able to fold the terminator
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 2e3b694..ae6ad74 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -192,7 +192,7 @@ bool SROA::performPromotion(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF, F.getContext());
+    PromoteMemToReg(Allocas, DT, DF);
     NumPromoted += Allocas.size();
     Changed = true;
   }
@@ -469,15 +469,41 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
     case Instruction::GetElementPtr: {
       GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
       bool AreAllZeroIndices = isFirstElt;
-      if (GEP->getNumOperands() > 1) {
-        if (!isa<ConstantInt>(GEP->getOperand(1)) ||
-            !cast<ConstantInt>(GEP->getOperand(1))->isZero())
-          // Using pointer arithmetic to navigate the array.
-          return MarkUnsafe(Info);
-       
-        if (AreAllZeroIndices)
-          AreAllZeroIndices = GEP->hasAllZeroIndices();
+      if (GEP->getNumOperands() > 1 &&
+          (!isa<ConstantInt>(GEP->getOperand(1)) ||
+           !cast<ConstantInt>(GEP->getOperand(1))->isZero()))
+        // Using pointer arithmetic to navigate the array.
+        return MarkUnsafe(Info);
+      
+      // Verify that any array subscripts are in range.
+      for (gep_type_iterator GEPIt = gep_type_begin(GEP),
+           E = gep_type_end(GEP); GEPIt != E; ++GEPIt) {
+        // Ignore struct elements, no extra checking needed for these.
+        if (isa<StructType>(*GEPIt))
+          continue;
+
+        // This GEP indexes an array.  Verify that this is an in-range
+        // constant integer. Specifically, consider A[0][i]. We cannot know that
+        // the user isn't doing invalid things like allowing i to index an
+        // out-of-range subscript that accesses A[1].  Because of this, we have
+        // to reject SROA of any accesses into structs where any of the
+        // components are variables. 
+        ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+        if (!IdxVal) return MarkUnsafe(Info);
+        
+        // Are all indices still zero?
+        AreAllZeroIndices &= IdxVal->isZero();
+        
+        if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) {
+          if (IdxVal->getZExtValue() >= AT->getNumElements())
+            return MarkUnsafe(Info);
+        } else if (const VectorType *VT = dyn_cast<VectorType>(*GEPIt)) {
+          if (IdxVal->getZExtValue() >= VT->getNumElements())
+            return MarkUnsafe(Info);
+        }
       }
+      
+      
       isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
       if (Info.isUnsafe) return;
       break;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 6a81480..e905952 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Attributes.h"
 #include "llvm/Support/CFG.h"
@@ -57,7 +56,7 @@ FunctionPass *llvm::createCFGSimplificationPass() {
 
 /// ChangeToUnreachable - Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I, LLVMContext &Context) {
+static void ChangeToUnreachable(Instruction *I) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
@@ -95,8 +94,7 @@ static void ChangeToCall(InvokeInst *II) {
 }
 
 static bool MarkAliveBlocks(BasicBlock *BB,
-                            SmallPtrSet<BasicBlock*, 128> &Reachable,
-                            LLVMContext &Context) {
+                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
   
   SmallVector<BasicBlock*, 128> Worklist;
   Worklist.push_back(BB);
@@ -119,7 +117,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
           // though.
           ++BBI;
           if (!isa<UnreachableInst>(BBI)) {
-            ChangeToUnreachable(BBI, Context);
+            ChangeToUnreachable(BBI);
             Changed = true;
           }
           break;
@@ -135,7 +133,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         if (isa<UndefValue>(Ptr) ||
             (isa<ConstantPointerNull>(Ptr) &&
              SI->getPointerAddressSpace() == 0)) {
-          ChangeToUnreachable(SI, Context);
+          ChangeToUnreachable(SI);
           Changed = true;
           break;
         }
@@ -161,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
 /// otherwise.
 static bool RemoveUnreachableBlocksFromFn(Function &F) {
   SmallPtrSet<BasicBlock*, 128> Reachable;
-  bool Changed = MarkAliveBlocks(F.begin(), Reachable, F.getContext());
+  bool Changed = MarkAliveBlocks(F.begin(), Reachable);
   
   // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 611505e..f9b929c 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -81,6 +81,11 @@ public:
   Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
                     unsigned Align, IRBuilder<> &B);
 
+  /// EmitMemMove - Emit a call to the memmove function to the builder.  This
+  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+  Value *EmitMemMove(Value *Dst, Value *Src, Value *Len,
+		     unsigned Align, IRBuilder<> &B);
+
   /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
   /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
   Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B);
@@ -160,6 +165,21 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
                        ConstantInt::get(Type::getInt32Ty(*Context), Align));
 }
 
+/// EmitMemMOve - Emit a call to the memmove function to the builder.  This
+/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len,
+					unsigned Align, IRBuilder<> &B) {
+  Module *M = Caller->getParent();
+  Intrinsic::ID IID = Intrinsic::memmove;
+  const Type *Tys[1];
+  Tys[0] = TD->getIntPtrType(*Context);
+  Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
+  Value *D = CastToCStr(Dst, B);
+  Value *S = CastToCStr(Src, B);
+  Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align);
+  return B.CreateCall4(MemMove, D, S, Len, A);
+}
+
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
 /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
 Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
@@ -512,27 +532,6 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
 }
 
 //===----------------------------------------------------------------------===//
-// Miscellaneous LibCall/Intrinsic Optimizations
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct SizeOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // TODO: We can do more with this, but delaying to here should be no change
-    // in behavior.
-    ConstantInt *Const = dyn_cast<ConstantInt>(CI->getOperand(2));
-    
-    if (!Const) return 0;
-    
-    if (Const->getZExtValue() < 2)
-      return Constant::getAllOnesValue(Const->getType());
-    else
-      return ConstantInt::get(Const->getType(), 0);
-  }
-};
-}
-
-//===----------------------------------------------------------------------===//
 // String and Memory LibCall Optimizations
 //===----------------------------------------------------------------------===//
 
@@ -1010,16 +1009,7 @@ struct MemMoveOpt : public LibCallOptimization {
       return 0;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
-    Module *M = Caller->getParent();
-    Intrinsic::ID IID = Intrinsic::memmove;
-    const Type *Tys[1];
-    Tys[0] = TD->getIntPtrType(*Context);
-    Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
-    Value *Dst = CastToCStr(CI->getOperand(1), B);
-    Value *Src = CastToCStr(CI->getOperand(2), B);
-    Value *Size = CI->getOperand(3);
-    Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
-    B.CreateCall4(MemMove, Dst, Src, Size, Align);
+    EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
     return CI->getOperand(1);
   }
 };
@@ -1048,6 +1038,118 @@ struct MemSetOpt : public LibCallOptimization {
 };
 
 //===----------------------------------------------------------------------===//
+// Object Size Checking Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'object size'
+namespace {
+struct SizeOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // TODO: We can do more with this, but delaying to here should be no change
+    // in behavior.
+    ConstantInt *Const = dyn_cast<ConstantInt>(CI->getOperand(2));
+
+    if (!Const) return 0;
+
+    const Type *Ty = Callee->getFunctionType()->getReturnType();
+
+    if (Const->getZExtValue() < 2)
+      return Constant::getAllOnesValue(Ty);
+    else
+      return ConstantInt::get(Ty, 0);
+  }
+};
+}
+
+//===---------------------------------------===//
+// 'memcpy_chk' Optimizations
+
+struct MemCpyChkOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !isa<PointerType>(FT->getParamType(0)) ||
+        !isa<PointerType>(FT->getParamType(1)) ||
+	!isa<IntegerType>(FT->getParamType(3)) ||
+	FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!SizeCI)
+      return 0;
+    if (SizeCI->isAllOnesValue()) {
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+      return CI->getOperand(1);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'memset_chk' Optimizations
+
+struct MemSetChkOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !isa<PointerType>(FT->getParamType(0)) ||
+        !isa<IntegerType>(FT->getParamType(1)) ||
+	!isa<IntegerType>(FT->getParamType(3)) ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!SizeCI)
+      return 0;
+    if (SizeCI->isAllOnesValue()) {
+      Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
+				   false);
+      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);
+      return CI->getOperand(1);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'memmove_chk' Optimizations
+
+struct MemMoveChkOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !isa<PointerType>(FT->getParamType(0)) ||
+        !isa<PointerType>(FT->getParamType(1)) ||
+	!isa<IntegerType>(FT->getParamType(3)) ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!SizeCI)
+      return 0;
+    if (SizeCI->isAllOnesValue()) {
+      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+		  1, B);
+      return CI->getOperand(1);
+    }
+
+    return 0;
+  }
+};
+
+//===----------------------------------------------------------------------===//
 // Math Library Optimizations
 //===----------------------------------------------------------------------===//
 
@@ -1356,7 +1458,7 @@ struct PrintFOpt : public LibCallOptimization {
     if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
         isa<IntegerType>(CI->getOperand(2)->getType())) {
       Value *Res = EmitPutChar(CI->getOperand(2), B);
-      
+
       if (CI->use_empty()) return CI;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
@@ -1586,7 +1688,10 @@ namespace {
     // Formatting and IO Optimizations
     SPrintFOpt SPrintF; PrintFOpt PrintF;
     FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+
+    // Object Size Checking
     SizeOpt ObjectSize;
+    MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk;
 
     bool Modified;  // This is only used by doInitialization.
   public:
@@ -1692,9 +1797,13 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["fwrite"] = &FWrite;
   Optimizations["fputs"] = &FPuts;
   Optimizations["fprintf"] = &FPrintF;
-  
-  // Miscellaneous
-  Optimizations["llvm.objectsize"] = &ObjectSize;
+
+  // Object Size Checking
+  Optimizations["llvm.objectsize.i32"] = &ObjectSize;
+  Optimizations["llvm.objectsize.i64"] = &ObjectSize;
+  Optimizations["__memcpy_chk"] = &MemCpyChk;
+  Optimizations["__memset_chk"] = &MemSetChk;
+  Optimizations["__memmove_chk"] = &MemMoveChk;
 }
 
 
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 44a2c1f..690972d 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -477,8 +477,13 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
   SmallVector<BasicBlock*, 8> OuterLoopPreds;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
     if (PN->getIncomingValue(i) != PN ||
-        !L->contains(PN->getIncomingBlock(i)))
+        !L->contains(PN->getIncomingBlock(i))) {
+      // We can't split indirectbr edges.
+      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+        return 0;
+
       OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+    }
 
   BasicBlock *Header = L->getHeader();
   BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 9416604..99203b6 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -73,7 +73,7 @@ bool PromotePass::runOnFunction(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF, F.getContext());
+    PromoteMemToReg(Allocas, DT, DF);
     NumPromoted += Allocas.size();
     Changed = true;
   }
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index de6ad1d..e25f9e2 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/ADT/DenseMap.h"
@@ -180,8 +179,6 @@ namespace {
     ///
     AliasSetTracker *AST;
     
-    LLVMContext &Context;
-
     /// AllocaLookup - Reverse mapping of Allocas.
     ///
     std::map<AllocaInst*, unsigned>  AllocaLookup;
@@ -212,9 +209,8 @@ namespace {
     DenseMap<const BasicBlock*, unsigned> BBNumPreds;
   public:
     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
-                   DominanceFrontier &df, AliasSetTracker *ast,
-                   LLVMContext &C)
-      : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {}
+                   DominanceFrontier &df, AliasSetTracker *ast)
+      : Allocas(A), DT(dt), DF(df), AST(ast) {}
 
     void run();
 
@@ -1003,9 +999,9 @@ NextIteration:
 ///
 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
                            DominatorTree &DT, DominanceFrontier &DF,
-                           LLVMContext &Context, AliasSetTracker *AST) {
+                           AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
   if (Allocas.empty()) return;
 
-  PromoteMem2Reg(Allocas, DT, DF, AST, Context).run();
+  PromoteMem2Reg(Allocas, DT, DF, AST).run();
 }
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 78cd4dc..449e967 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1860,9 +1860,9 @@ LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
 }
 
 LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
-                              LLVMTypeRef DestTy, int isSigned,
-                              const char *Name) {
-  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), isSigned, Name));
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
+                                       /*isSigned*/true, Name));
 }
 
 LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 24e715b..b80b6bf 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -33,10 +33,8 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) {
   StringMapEntry<MDString *> &Entry = 
     pImpl->MDStringCache.GetOrCreateValue(Str);
   MDString *&S = Entry.getValue();
-  if (S) return S;
-  
-  return S = 
-    new MDString(Context, Entry.getKey());
+  if (!S) S = new MDString(Context, Entry.getKey());
+  return S;
 }
 
 MDString *MDString::get(LLVMContext &Context, const char *Str) {
@@ -44,10 +42,8 @@ MDString *MDString::get(LLVMContext &Context, const char *Str) {
   StringMapEntry<MDString *> &Entry = 
     pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef());
   MDString *&S = Entry.getValue();
-  if (S) return S;
-  
-  return S = 
-    new MDString(Context, Entry.getKey());
+  if (!S) S = new MDString(Context, Entry.getKey());
+  return S;
 }
 
 //===----------------------------------------------------------------------===//
@@ -74,28 +70,19 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) {
     ID.AddPointer(Vals[i]);
 
   void *InsertPoint;
-  MDNode *N;
-  {
-    N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
-  }  
-  if (N) return N;
-  
-  N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
   if (!N) {
     // InsertPoint will have been set by the FindNodeOrInsertPos call.
     N = new MDNode(Context, Vals, NumVals);
     pImpl->MDNodeSet.InsertNode(N, InsertPoint);
   }
-
   return N;
 }
 
 /// ~MDNode - Destroy MDNode.
 MDNode::~MDNode() {
-  {
-    LLVMContextImpl *pImpl = getType()->getContext().pImpl;
-    pImpl->MDNodeSet.RemoveNode(this);
-  }
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  pImpl->MDNodeSet.RemoveNode(this);
   delete [] Node;
   Node = NULL;
 }
@@ -231,7 +218,7 @@ public:
   /// addMD - Attach the metadata of given kind to an Instruction.
   void addMD(unsigned Kind, MDNode *Node, Instruction *Inst);
   
-  /// removeMD - Remove metadata of given kind attached with an instuction.
+  /// removeMD - Remove metadata of given kind attached with an instruction.
   void removeMD(unsigned Kind, Instruction *Inst);
   
   /// removeAllMetadata - Remove all metadata attached with an instruction.
@@ -241,7 +228,7 @@ public:
   /// the same metadata to In2.
   void copyMD(Instruction *In1, Instruction *In2);
 
-  /// getHandlerNames - Populate client supplied smallvector using custome
+  /// getHandlerNames - Populate client-supplied smallvector using custom
   /// metadata name and ID.
   void getHandlerNames(SmallVectorImpl<std::pair<unsigned, StringRef> >&) const;
 
@@ -302,7 +289,7 @@ void MetadataContextImpl::addMD(unsigned MDKind, MDNode *Node,
   Info.push_back(std::make_pair(MDKind, Node));
 }
 
-/// removeMD - Remove metadata of given kind attached with an instuction.
+/// removeMD - Remove metadata of given kind attached with an instruction.
 void MetadataContextImpl::removeMD(unsigned Kind, Instruction *Inst) {
   MDStoreTy::iterator I = MetadataStore.find(Inst);
   if (I == MetadataStore.end())
@@ -317,7 +304,7 @@ void MetadataContextImpl::removeMD(unsigned Kind, Instruction *Inst) {
     }
   }
 }
-  
+
 /// removeAllMetadata - Remove all metadata attached with an instruction.
 void MetadataContextImpl::removeAllMetadata(Instruction *Inst) {
   MetadataStore.erase(Inst);
@@ -454,12 +441,12 @@ getMDs(const Instruction *Inst,
 void MetadataContext::addMD(unsigned Kind, MDNode *Node, Instruction *Inst) {
   pImpl->addMD(Kind, Node, Inst);
 }
-  
-/// removeMD - Remove metadata of given kind attached with an instuction.
+
+/// removeMD - Remove metadata of given kind attached with an instruction.
 void MetadataContext::removeMD(unsigned Kind, Instruction *Inst) {
   pImpl->removeMD(Kind, Inst);
 }
-  
+
 /// removeAllMetadata - Remove all metadata attached with an instruction.
 void MetadataContext::removeAllMetadata(Instruction *Inst) {
   pImpl->removeAllMetadata(Inst);
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index d3d61f5..ae418a0 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -1231,6 +1231,9 @@ bool FunctionPassManager::doFinalization() {
 bool FunctionPassManagerImpl::doInitialization(Module &M) {
   bool Changed = false;
 
+  dumpArguments();
+  dumpPasses();
+
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
     Changed |= getContainedManager(Index)->doInitialization(M);
 
@@ -1274,9 +1277,6 @@ bool FunctionPassManagerImpl::run(Function &F) {
   bool Changed = false;
   TimingInfo::createTheTimeInfo();
 
-  dumpArguments();
-  dumpPasses();
-
   initializeAllAnalysisInfo();
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
     Changed |= getContainedManager(Index)->runOnFunction(F);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 7ab7b15..7aa86b7 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1475,6 +1475,9 @@ void Verifier::visitInstruction(Instruction &I) {
 void Verifier::VerifyType(const Type *Ty) {
   if (!Types.insert(Ty)) return;
 
+  Assert1(&Mod->getContext() == &Ty->getContext(),
+          "Type context does not match Module context!", Ty);
+
   switch (Ty->getTypeID()) {
   case Type::FunctionTyID: {
     const FunctionType *FTy = cast<FunctionType>(Ty);
author	rdivacky <rdivacky@FreeBSD.org>	2009-12-01 11:07:05 +0000
committer	rdivacky <rdivacky@FreeBSD.org>	2009-12-01 11:07:05 +0000
commit	e7908924d847e63b02bc82bfaa1709ab9c774dcd (patch)
tree	ffe0478472eaa0686f11cb02c6df7d257b8719b0 /lib
parent	bf68f1ea49e39c4194f339ddd4421b0c3a31988b (diff)
download	FreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.zip FreeBSD-src-e7908924d847e63b02bc82bfaa1709ab9c774dcd.tar.gz