464 files changed, 33984 insertions, 12166 deletions
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 66e416cd..349c417 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -11,6 +11,8 @@
 #include "llvm-c/Initialization.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassRegistry.h"
 #include <cstring>
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index ae6da1a..f8509dd 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+  if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -98,6 +98,35 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
 static bool isObjectSmallerThan(const Value *V, uint64_t Size,
                                 const DataLayout &TD,
                                 const TargetLibraryInfo &TLI) {
+  // Note that the meanings of the "object" are slightly different in the
+  // following contexts:
+  //    c1: llvm::getObjectSize()
+  //    c2: llvm.objectsize() intrinsic
+  //    c3: isObjectSmallerThan()
+  // c1 and c2 share the same meaning; however, the meaning of "object" in c3
+  // refers to the "entire object".
+  //
+  //  Consider this example:
+  //     char *p = (char*)malloc(100)
+  //     char *q = p+80;
+  //
+  //  In the context of c1 and c2, the "object" pointed by q refers to the
+  // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20.
+  //
+  //  However, in the context of c3, the "object" refers to the chunk of memory
+  // being allocated. So, the "object" has 100 bytes, and q points to the middle
+  // the "object". In case q is passed to isObjectSmallerThan() as the 1st
+  // parameter, before the llvm::getObjectSize() is called to get the size of
+  // entire object, we should:
+  //    - either rewind the pointer q to the base-address of the object in
+  //      question (in this case rewind to p), or
+  //    - just give up. It is up to caller to make sure the pointer is pointing
+  //      to the base address the object.
+  // 
+  // We go for 2nd option for simplicity.
+  if (!isIdentifiedObject(V))
+    return false;
+
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
   uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index 09d7608..bc0dffc 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -550,7 +551,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 
   if (Opc == Instruction::And && DL) {
-    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType());
+    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
     APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
     APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
     ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
@@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
 }
 
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
-/// using the specified DataLayout.  If successful, the constant result is
-/// result is returned, if not, null is returned.
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const DataLayout *TD,
-                                               const TargetLibraryInfo *TLI) {
-  SmallVector<Constant*, 8> Ops;
-  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
-       i != e; ++i) {
+static Constant *
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+                                   const TargetLibraryInfo *TLI,
+                                   SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
+  SmallVector<Constant *, 8> Ops;
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
+       ++i) {
     Constant *NewC = cast<Constant>(*i);
-    // Recursively fold the ConstantExpr's operands.
-    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
-      NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
+    // Recursively fold the ConstantExpr's operands. If we have already folded
+    // a ConstantExpr, we don't have to process it again.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
+      if (FoldedOps.insert(NewCE))
+        NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+    }
     Ops.push_back(NewC);
   }
 
@@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
   return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
 }
 
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified DataLayout.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                               const DataLayout *TD,
+                                               const TargetLibraryInfo *TLI) {
+  SmallPtrSet<ConstantExpr *, 4> FoldedOps;
+  return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+}
+
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
 /// specified opcode and operands.  If successful, the constant result is
 /// returned, if not, null is returned.  Note that this function can fail when
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
index aa5164e..1c1816d 100644
--- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 4a3c74e..bf77451 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1711,7 +1711,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
 //    subobject at its beginning) or function, both are pointers to one past the
 //    last element of the same array object, or one is a pointer to one past the
 //    end of one array object and the other is a pointer to the start of a
-//    different array object that happens to immediately follow the ﬁrst array
+//    different array object that happens to immediately follow the first array
 //    object in the address space.)
 //
 // C11's version is more restrictive, however there's no reason why an argument
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index d490d54..9c0d8ac 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -364,26 +364,6 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
   return true;
 }
 
-/// \brief Compute the size of the underlying object pointed by Ptr. Returns
-/// true and the object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
-                                   const DataLayout *TD,
-                                   const TargetLibraryInfo *TLI,
-                                   bool RoundToAlign) {
-  if (!TD)
-    return false;
-
-  ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
-  SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
-  if (!Visitor.knownSize(Data))
-    return false;
-
-  Size = Data.first.getZExtValue();
-  return true;
-}
-
 
 STATISTIC(ObjectVisitorArgument,
           "Number of arguments with unsolved size and offset");
@@ -409,23 +389,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   V = V->stripPointerCasts();
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If we have already seen this instruction, bail out. Cycles can happen in
+    // unreachable code after constant propagation.
+    if (!SeenInsts.insert(I))
+      return unknown();
 
-  if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
-    // Return cached value or insert unknown in cache if size of V was not
-    // computed yet in order to avoid recursions in PHis.
-    std::pair<CacheMapTy::iterator, bool> CacheVal =
-      CacheMap.insert(std::make_pair(V, unknown()));
-    if (!CacheVal.second)
-      return CacheVal.first->second;
-
-    SizeOffsetType Result;
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-      Result = visitGEPOperator(*GEP);
-    else
-      Result = visit(cast<Instruction>(*V));
-    return CacheMap[V] = Result;
+      return visitGEPOperator(*GEP);
+    return visit(*I);
   }
-
   if (Argument *A = dyn_cast<Argument>(V))
     return visitArgument(*A);
   if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -439,6 +412,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::IntToPtr)
       return unknown(); // clueless
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      return visitGEPOperator(cast<GEPOperator>(*CE));
   }
 
   DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
@@ -572,21 +547,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
   return unknown();
 }
 
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
-  if (PHI.getNumIncomingValues() == 0)
-    return unknown();
-
-  SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
-  if (!bothKnown(Ret))
-    return unknown();
-
-  // Verify that all PHI incoming pointers have the same size and offset.
-  for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
-    SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
-    if (!bothKnown(EdgeData) || EdgeData != Ret)
-      return unknown();
-  }
-  return Ret;
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
+  // too complex to analyze statically.
+  return unknown();
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2240e9d..c0009cb 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,9 +47,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
 // Limit for the number of instructions to scan in a block.
-// FIXME: Figure out what a sane value is for this.
-//        (500 is relatively insane.)
-static const int BlockScanLimit = 500;
+static const int BlockScanLimit = 100;
 
 char MemoryDependenceAnalysis::ID = 0;
 
@@ -913,7 +911,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                             SmallVectorImpl<NonLocalDepResult> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
                             bool SkipFirstBlock) {
-
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
 
@@ -1001,8 +998,17 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I) {
       Visited.insert(std::make_pair(I->getBB(), Addr));
-      if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB()))
+      if (I->getResult().isNonLocal()) {
+        continue;
+      }
+
+      if (!DT) {
+        Result.push_back(NonLocalDepResult(I->getBB(),
+                                           MemDepResult::getUnknown(),
+                                           Addr));
+      } else if (DT->isReachableFromEntry(I->getBB())) {
         Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+      }
     }
     ++NumCacheCompleteNonLocalPtr;
     return false;
@@ -1047,9 +1053,16 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                                                  NumSortedEntries);
 
       // If we got a Def or Clobber, add this to the list of results.
-      if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
-        Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
-        continue;
+      if (!Dep.isNonLocal()) {
+        if (!DT) {
+          Result.push_back(NonLocalDepResult(BB,
+                                             MemDepResult::getUnknown(),
+                                             Pointer.getAddr()));
+          continue;
+        } else if (DT->isReachableFromEntry(BB)) {
+          Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+          continue;
+        }
       }
     }
 
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index fad5074..8577025 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -79,10 +79,43 @@ void Region::replaceExit(BasicBlock *BB) {
   exit = BB;
 }
 
+void Region::replaceEntryRecursive(BasicBlock *NewEntry) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldEntry = getEntry();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceEntry(NewEntry);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getEntry() == OldEntry)
+        RegionQueue.push_back(*RI);
+  }
+}
+
+void Region::replaceExitRecursive(BasicBlock *NewExit) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldExit = getExit();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceExit(NewExit);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getExit() == OldExit)
+        RegionQueue.push_back(*RI);
+  }
+}
+
 bool Region::contains(const BasicBlock *B) const {
   BasicBlock *BB = const_cast<BasicBlock*>(B);
 
-  assert(DT->getNode(BB) && "BB not part of the dominance tree");
+  if (!DT->getNode(BB))
+    return false;
 
   BasicBlock *entry = getEntry(), *exit = getExit();
 
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 6ea915f..f876748 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3937,10 +3937,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 /// before taking the branch. For loops with multiple exits, it may not be the
 /// number times that the loop header executes because the loop may exit
 /// prematurely via another branch.
+///
+/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
+/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
+/// loop exits. getExitCount() may return an exact count for this branch
+/// assuming no-signed-wrap. The number of well-defined iterations may actually
+/// be higher than this trip count if this exit test is skipped and the loop
+/// exits via a different branch. Ideally, getExitCount() would know whether it
+/// depends on a NSW assumption, and we would only fall back to a conservative
+/// trip count in that case.
 unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
+getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
   const SCEVConstant *ExitCount =
-    dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+    dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
   if (!ExitCount)
     return 0;
 
@@ -3967,8 +3976,8 @@ getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
 /// As explained in the comments for getSmallConstantTripCount, this assumes
 /// that control exits the loop via ExitingBlock.
 unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
-  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) {
+  const SCEV *ExitCount = getBackedgeTakenCount(L);
   if (ExitCount == getCouldNotCompute())
     return 1;
 
@@ -3997,7 +4006,7 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
 }
 
 // getExitCount - Get the expression for the number of loop iterations for which
-// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// this loop is guaranteed not to exit via ExitingBlock. Otherwise return
 // SCEVCouldNotCompute.
 const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
@@ -4382,26 +4391,36 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
   // Proceed to the next level to examine the exit condition expression.
   return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
                                   ExitBr->getSuccessor(0),
-                                  ExitBr->getSuccessor(1));
+                                  ExitBr->getSuccessor(1),
+                                  /*IsSubExpr=*/false);
 }
 
 /// ComputeExitLimitFromCond - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of ExitCond, TBB, and FBB.
+///
+/// @param IsSubExpr is true if ExitCond does not directly control the exit
+/// branch. In this case, we cannot assume that the loop only exits when the
+/// condition is true and cannot infer that failing to meet the condition prior
+/// to integer wraparound results in undefined behavior.
 ScalarEvolution::ExitLimit
 ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
                                           Value *ExitCond,
                                           BasicBlock *TBB,
-                                          BasicBlock *FBB) {
+                                          BasicBlock *FBB,
+                                          bool IsSubExpr) {
   // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+      bool EitherMayExit = L->contains(TBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
-      if (L->contains(TBB)) {
+      if (EitherMayExit) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.Exact == getCouldNotCompute() ||
@@ -4429,11 +4448,14 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+      bool EitherMayExit = L->contains(FBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
-      if (L->contains(FBB)) {
+      if (EitherMayExit) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.Exact == getCouldNotCompute() ||
@@ -4464,7 +4486,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   // Proceed to the next level to examine the icmp.
   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
+    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
 
   // Check for a constant condition. These are normally stripped out by
   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4490,7 +4512,8 @@ ScalarEvolution::ExitLimit
 ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
                                           ICmpInst *ExitCond,
                                           BasicBlock *TBB,
-                                          BasicBlock *FBB) {
+                                          BasicBlock *FBB,
+                                          bool IsSubExpr) {
 
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Cond;
@@ -4542,7 +4565,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -4553,24 +4576,24 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
     break;
   }
   case ICmpInst::ICMP_SLT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SGT: {
     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                             getNotSCEV(RHS), L, true);
+                                    getNotSCEV(RHS), L, true, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_ULT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_UGT: {
     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                             getNotSCEV(RHS), L, false);
+                                    getNotSCEV(RHS), L, false, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -5439,7 +5462,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 /// effectively V != 0.  We know and take advantage of the fact that this
 /// expression only being used in a comparison by zero context.
 ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -5537,19 +5560,20 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   }
 
   // If the recurrence is known not to wraparound, unsigned divide computes the
-  // back edge count. We know that the value will either become zero (and thus
-  // the loop terminates), that the loop will terminate through some other exit
-  // condition first, or that the loop has undefined behavior.  This means
-  // we can't "miss" the exit value, even with nonunit stride.
+  // back edge count. (Ideally we would have an "isexact" bit for udiv). We know
+  // that the value will either become zero (and thus the loop terminates), that
+  // the loop will terminate through some other exit condition first, or that
+  // the loop has undefined behavior.  This means we can't "miss" the exit
+  // value, even with nonunit stride.
   //
-  // FIXME: Prove that loops always exhibits *acceptable* undefined
-  // behavior. Loops must exhibit defined behavior until a wrapped value is
-  // actually used. So the trip count computed by udiv could be smaller than the
-  // number of well-defined iterations.
-  if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
-    // FIXME: We really want an "isexact" bit for udiv.
+  // This is only valid for expressions that directly compute the loop exit. It
+  // is invalid for subexpressions in which the loop may exit through this
+  // branch even if this subexpression is false. In that case, the trip count
+  // computed by this udiv could be smaller than the number of well-defined
+  // iterations.
+  if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
     return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-  }
+
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
     return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -6315,9 +6339,14 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
+///
+/// @param IsSubExpr is true when the LHS < RHS condition does not directly
+/// control the branch. In this case, we can only compute an iteration count for
+/// a subexpression that cannot overflow before evaluating true.
 ScalarEvolution::ExitLimit
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned) {
+                                  const Loop *L, bool isSigned,
+                                  bool IsSubExpr) {
   // Only handle:  "ADDREC < LoopInvariant".
   if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
 
@@ -6326,10 +6355,12 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     return getCouldNotCompute();
 
   // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = isSigned ?
-    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
-    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
-
+  bool NoWrap = false;
+  if (!IsSubExpr) {
+    NoWrap = AddRec->getNoWrapFlags(
+      (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
+                          | SCEV::FlagNW));
+  }
   if (AddRec->isAffine()) {
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     const SCEV *Step = AddRec->getStepRecurrence(*this);
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 68e43b2..bbf3c3a 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -71,6 +71,7 @@ using namespace llvm;
 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
 // more convenient.
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
 
 namespace {
   /// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -109,6 +110,97 @@ namespace {
       return CI->getValue()[0];
     }
   };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTagNode {
+    /// This node should be created with createTBAAStructTagNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTagNode() : Node(0) {}
+    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTagNode.
+    const MDNode *getNode() const { return Node; }
+
+    const MDNode *getBaseType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+    }
+    const MDNode *getAccessType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+    }
+    uint64_t getOffset() const {
+      return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+    }
+    /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+    /// objects which are not modified (by any means) in the context where this
+    /// AliasAnalysis is relevant.
+    bool TypeIsImmutable() const {
+      if (Node->getNumOperands() < 4)
+        return false;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
+      if (!CI)
+        return false;
+      return CI->getValue()[0];
+    }
+  };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTypeNode {
+    /// This node should be created with createTBAAStructTypeNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTypeNode() : Node(0) {}
+    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTypeNode.
+    const MDNode *getNode() const { return Node; }
+
+    /// Get this TBAAStructTypeNode's field in the type DAG with
+    /// given offset. Update the offset to be relative to the field type.
+    TBAAStructTypeNode getParent(uint64_t &Offset) const {
+      // Parent can be omitted for the root node.
+      if (Node->getNumOperands() < 2)
+        return TBAAStructTypeNode();
+
+      // Special handling for a scalar type node. 
+      if (Node->getNumOperands() <= 3) {
+        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+        if (!P)
+          return TBAAStructTypeNode();
+        return TBAAStructTypeNode(P);
+      }
+
+      // Assume the offsets are in order. We return the previous field if
+      // the current offset is bigger than the given offset.
+      unsigned TheIdx = 0;
+      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+        uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
+                         getZExtValue();
+        if (Cur > Offset) {
+          assert(Idx >= 3 &&
+                 "TBAAStructTypeNode::getParent should have an offset match!");
+          TheIdx = Idx - 2;
+          break;
+        }
+      }
+      // Move along the last field.
+      if (TheIdx == 0)
+        TheIdx = Node->getNumOperands() - 2;
+      uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
+                       getZExtValue();
+      Offset -= Cur;
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+      if (!P)
+        return TBAAStructTypeNode();
+      return TBAAStructTypeNode(P);
+    }
+  };
 }
 
 namespace {
@@ -137,6 +229,7 @@ namespace {
     }
 
     bool Aliases(const MDNode *A, const MDNode *B) const;
+    bool PathAliases(const MDNode *A, const MDNode *B) const;
 
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -171,6 +264,9 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 TypeBasedAliasAnalysis::Aliases(const MDNode *A,
                                 const MDNode *B) const {
+  if (EnableStructPathTBAA)
+    return PathAliases(A, B);
+
   // Keep track of the root node for A and B.
   TBAANode RootA, RootB;
 
@@ -209,6 +305,67 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A,
   return false;
 }
 
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool
+TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
+                                    const MDNode *B) const {
+  // Keep track of the root node for A and B.
+  TBAAStructTypeNode RootA, RootB;
+  TBAAStructTagNode TagA(A), TagB(B);
+
+  // TODO: We need to check if AccessType of TagA encloses AccessType of
+  // TagB to support aggregate AccessType. If yes, return true.
+
+  // Start from the base type of A, follow the edge with the correct offset in
+  // the type DAG and adjust the offset until we reach the base type of B or
+  // until we reach the Root node.
+  // Compare the adjusted offset once we have the same base.
+
+  // Climb the type DAG from base type of A to see if we reach base type of B.
+  const MDNode *BaseA = TagA.getBaseType();
+  const MDNode *BaseB = TagB.getBaseType();
+  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+  for (TBAAStructTypeNode T(BaseA); ; ) {
+    if (T.getNode() == BaseB)
+      // Base type of A encloses base type of B, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootA = T;
+    // Follow the edge with the correct offset, OffsetA will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetA);
+    if (!T.getNode())
+      break;
+  }
+
+  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+  // base type of A.
+  OffsetA = TagA.getOffset();
+  for (TBAAStructTypeNode T(BaseB); ; ) {
+    if (T.getNode() == BaseA)
+      // Base type of B encloses base type of A, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootB = T;
+    // Follow the edge with the correct offset, OffsetB will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetB);
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
 AliasAnalysis::AliasResult
 TypeBasedAliasAnalysis::alias(const Location &LocA,
                               const Location &LocB) {
@@ -240,7 +397,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
-  if (TBAANode(M).TypeIsImmutable())
+  if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+      (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
     return true;
 
   return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -256,7 +414,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
-    if (TBAANode(M).TypeIsImmutable())
+    if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+        (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
       Min = OnlyReadsMemory;
 
   return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -298,3 +457,55 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
 
   return AliasAnalysis::getModRefInfo(CS1, CS2);
 }
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return NULL;
+
+  if (A == B)
+    return A;
+
+  // For struct-path aware TBAA, we use the access type of the tag.
+  if (EnableStructPathTBAA) {
+    A = cast_or_null<MDNode>(A->getOperand(1));
+    if (!A) return 0;
+    B = cast_or_null<MDNode>(B->getOperand(1));
+    if (!B) return 0;
+  }
+
+  SmallVector<MDNode *, 4> PathA;
+  MDNode *T = A;
+  while (T) {
+    PathA.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  SmallVector<MDNode *, 4> PathB;
+  T = B;
+  while (T) {
+    PathB.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  int IA = PathA.size() - 1;
+  int IB = PathB.size() - 1;
+
+  MDNode *Ret = 0;
+  while (IA >= 0 && IB >=0) {
+    if (PathA[IA] == PathB[IB])
+      Ret = PathA[IA];
+    else
+      break;
+    --IA;
+    --IB;
+  }
+  if (!EnableStructPathTBAA)
+    return Ret;
+
+  if (!Ret)
+    return 0;
+  // We need to convert from a type node to a tag node.
+  Type *Int64 = IntegerType::get(A->getContext(), 64);
+  Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
+  return MDNode::get(A->getContext(), Ops);
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index f46383b..e7a9f2a 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -582,6 +582,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(optsize);
   KEYWORD(readnone);
   KEYWORD(readonly);
+  KEYWORD(returned);
   KEYWORD(returns_twice);
   KEYWORD(signext);
   KEYWORD(sret);
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index c8da1f8..62d8070d 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -528,7 +528,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
   if (Result) return false;
 
   // Otherwise, create MDNode forward reference.
-  MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
+  MDNode *FwdNode = MDNode::getTemporary(Context, None);
   ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
 
   if (NumberedMetadata.size() <= MID)
@@ -878,8 +878,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
 
     // Target-independent attributes:
     case lltok::kw_align: {
-      // As a hack, we allow "align 2" on functions as a synonym for "alignstack
-      // 2".
+      // As a hack, we allow function alignment to be initially parsed as an
+      // attribute on a function declaration/definition or added to an attribute
+      // group and later moved to the alignment field.
       unsigned Alignment;
       if (inAttrGrp) {
         Lex.Lex();
@@ -943,6 +944,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_nest:
     case lltok::kw_noalias:
     case lltok::kw_nocapture:
+    case lltok::kw_returned:
     case lltok::kw_sret:
       HaveError |=
         Error(Lex.getLoc(),
@@ -1155,21 +1157,35 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_nest:            B.addAttribute(Attribute::Nest); break;
     case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
     case lltok::kw_nocapture:       B.addAttribute(Attribute::NoCapture); break;
+    case lltok::kw_returned:        B.addAttribute(Attribute::Returned); break;
     case lltok::kw_signext:         B.addAttribute(Attribute::SExt); break;
     case lltok::kw_sret:            B.addAttribute(Attribute::StructRet); break;
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
 
-    case lltok::kw_alignstack:      case lltok::kw_nounwind:
-    case lltok::kw_alwaysinline:    case lltok::kw_optsize:
-    case lltok::kw_inlinehint:      case lltok::kw_readnone:
-    case lltok::kw_minsize:         case lltok::kw_readonly:
-    case lltok::kw_naked:           case lltok::kw_returns_twice:
-    case lltok::kw_nobuiltin:       case lltok::kw_sanitize_address:
-    case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory:
-    case lltok::kw_noinline:        case lltok::kw_sanitize_thread:
-    case lltok::kw_nonlazybind:     case lltok::kw_ssp:
-    case lltok::kw_noredzone:       case lltok::kw_sspreq:
-    case lltok::kw_noreturn:        case lltok::kw_uwtable:
+    case lltok::kw_alignstack:
+    case lltok::kw_alwaysinline:
+    case lltok::kw_inlinehint:
+    case lltok::kw_minsize:
+    case lltok::kw_naked:
+    case lltok::kw_nobuiltin:
+    case lltok::kw_noduplicate:
+    case lltok::kw_noimplicitfloat:
+    case lltok::kw_noinline:
+    case lltok::kw_nonlazybind:
+    case lltok::kw_noredzone:
+    case lltok::kw_noreturn:
+    case lltok::kw_nounwind:
+    case lltok::kw_optsize:
+    case lltok::kw_readnone:
+    case lltok::kw_readonly:
+    case lltok::kw_returns_twice:
+    case lltok::kw_sanitize_address:
+    case lltok::kw_sanitize_memory:
+    case lltok::kw_sanitize_thread:
+    case lltok::kw_ssp:
+    case lltok::kw_sspreq:
+    case lltok::kw_sspstrong:
+    case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
@@ -1195,24 +1211,39 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
 
     // Error handling.
-    case lltok::kw_sret:  case lltok::kw_nocapture:
-    case lltok::kw_byval: case lltok::kw_nest:
+    case lltok::kw_align:
+    case lltok::kw_byval:
+    case lltok::kw_nest:
+    case lltok::kw_nocapture:
+    case lltok::kw_returned:
+    case lltok::kw_sret:
       HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
       break;
 
-    case lltok::kw_align:                 case lltok::kw_noreturn:
-    case lltok::kw_alignstack:            case lltok::kw_nounwind:
-    case lltok::kw_alwaysinline:          case lltok::kw_optsize:
-    case lltok::kw_inlinehint:            case lltok::kw_readnone:
-    case lltok::kw_minsize:               case lltok::kw_readonly:
-    case lltok::kw_naked:                 case lltok::kw_returns_twice:
-    case lltok::kw_nobuiltin:             case lltok::kw_sanitize_address:
-    case lltok::kw_noduplicate:           case lltok::kw_sanitize_memory:
-    case lltok::kw_noimplicitfloat:       case lltok::kw_sanitize_thread:
-    case lltok::kw_noinline:              case lltok::kw_ssp:
-    case lltok::kw_nonlazybind:           case lltok::kw_sspreq:
-    case lltok::kw_noredzone:             case lltok::kw_sspstrong:
-                                          case lltok::kw_uwtable:
+    case lltok::kw_alignstack:
+    case lltok::kw_alwaysinline:
+    case lltok::kw_inlinehint:
+    case lltok::kw_minsize:
+    case lltok::kw_naked:
+    case lltok::kw_nobuiltin:
+    case lltok::kw_noduplicate:
+    case lltok::kw_noimplicitfloat:
+    case lltok::kw_noinline:
+    case lltok::kw_nonlazybind:
+    case lltok::kw_noredzone:
+    case lltok::kw_noreturn:
+    case lltok::kw_nounwind:
+    case lltok::kw_optsize:
+    case lltok::kw_readnone:
+    case lltok::kw_readonly:
+    case lltok::kw_returns_twice:
+    case lltok::kw_sanitize_address:
+    case lltok::kw_sanitize_memory:
+    case lltok::kw_sanitize_thread:
+    case lltok::kw_ssp:
+    case lltok::kw_sspreq:
+    case lltok::kw_sspstrong:
+    case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
@@ -4232,7 +4263,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
 
   if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
 
-  if (!Ptr->getType()->getScalarType()->isPointerTy())
+  Type *BaseType = Ptr->getType();
+  PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType());
+  if (!BasePointerType)
     return Error(Loc, "base of getelementptr must be a pointer");
 
   SmallVector<Value*, 16> Indices;
@@ -4257,7 +4290,10 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     Indices.push_back(Val);
   }
 
-  if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
+  if (!Indices.empty() && !BasePointerType->getElementType()->isSized())
+    return Error(Loc, "base element of getelementptr must be sized");
+
+  if (!GetElementPtrInst::getIndexedType(BaseType, Indices))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ptr, Indices);
   if (InBounds)
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index cd25ba3..3bf54fa 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -114,6 +114,7 @@ namespace lltok {
     kw_optsize,
     kw_readnone,
     kw_readonly,
+    kw_returned,
     kw_returns_twice,
     kw_signext,
     kw_ssp,
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
index 5cd6c55..23630e5 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -10,6 +10,7 @@
 #include "llvm-c/BitReader.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cstring>
 #include <string>
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f348843..e6ff4b4 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -405,7 +405,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
   }
 
   // Create and return a placeholder, which will later be RAUW'd.
-  Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>());
+  Value *V = MDNode::getTemporary(Context, None);
   MDValuePtrs[Idx] = V;
   return V;
 }
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
index 9f51c35..985208c 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm-c/BitWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index dd7282c..4731af5 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -201,62 +201,161 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
   }
 }
 
+static bool isNoopBitcast(Type *T1, Type *T2,
+                          const TargetLowering& TLI) {
+  return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
+         (isa<VectorType>(T1) && isa<VectorType>(T2) &&
+          TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
+}
 
-/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
-/// through it (and any transitive noop operands to it) and return its input
-/// value.  This is used to determine if a tail call can be formed.
-///
-static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
-  // If V is not an instruction, it can't be looked through.
-  const Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
-  
-  Value *Op = I->getOperand(0);
+/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop
+/// (i.e., lowers to no machine code), look through it (and any transitive noop
+/// operands to it) and check if it has the same noop input value.  This is
+/// used to determine if a tail call can be formed.
+static bool sameNoopInput(const Value *V1, const Value *V2,
+                          SmallVectorImpl<unsigned> &Els1,
+                          SmallVectorImpl<unsigned> &Els2,
+                          const TargetLowering &TLI) {
+  using std::swap;
+  bool swapParity = false;
+  bool equalEls = Els1 == Els2;
+  while (true) {
+    if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) {
+      if (swapParity)
+        // Revert to original Els1 and Els2 to avoid confusing recursive calls
+        swap(Els1, Els2);
+      return true;
+    }
 
-  // Look through truly no-op truncates.
-  if (isa<TruncInst>(I) &&
-      TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
-    return getNoopInput(I->getOperand(0), TLI);
-  
-  // Look through truly no-op bitcasts.
-  if (isa<BitCastInst>(I)) {
-    // No type change at all.
-    if (Op->getType() == I->getType())
-      return getNoopInput(Op, TLI);
+    // Try to look through V1; if V1 is not an instruction, it can't be looked
+    // through.
+    const Instruction *I = dyn_cast<Instruction>(V1);
+    const Value *NoopInput = 0;
+    if (I != 0 && I->getNumOperands() > 0) {
+     Value *Op = I->getOperand(0);
+      if (isa<TruncInst>(I)) {
+        // Look through truly no-op truncates.
+        if (TLI.isTruncateFree(Op->getType(), I->getType()))
+          NoopInput = Op;
+      } else if (isa<BitCastInst>(I)) {
+        // Look through truly no-op bitcasts.
+        if (isNoopBitcast(Op->getType(), I->getType(), TLI))
+          NoopInput = Op;
+      } else if (isa<GetElementPtrInst>(I)) {
+        // Look through getelementptr
+        if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+          NoopInput = Op;
+      } else if (isa<IntToPtrInst>(I)) {
+        // Look through inttoptr.
+        // Make sure this isn't a truncating or extending cast.  We could
+        // support this eventually, but don't bother for now.
+        if (!isa<VectorType>(I->getType()) &&
+            TLI.getPointerTy().getSizeInBits() == 
+              cast<IntegerType>(Op->getType())->getBitWidth())
+          NoopInput = Op;
+      } else if (isa<PtrToIntInst>(I)) {
+        // Look through ptrtoint.
+        // Make sure this isn't a truncating or extending cast.  We could
+        // support this eventually, but don't bother for now.
+        if (!isa<VectorType>(I->getType()) &&
+            TLI.getPointerTy().getSizeInBits() == 
+              cast<IntegerType>(I->getType())->getBitWidth())
+          NoopInput = Op;
+      } else if (isa<CallInst>(I)) {
+        // Look through call
+        for (User::const_op_iterator i = I->op_begin(),
+                                     // Skip Callee
+                                     e = I->op_end() - 1;
+             i != e; ++i) {
+          unsigned attrInd = i - I->op_begin() + 1;
+          if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+              isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+            NoopInput = *i;
+            break;
+          }
+        }
+      } else if (isa<InvokeInst>(I)) {
+        // Look through invoke
+        for (User::const_op_iterator i = I->op_begin(),
+                                     // Skip BB, BB, Callee
+                                     e = I->op_end() - 3;
+             i != e; ++i) {
+          unsigned attrInd = i - I->op_begin() + 1;
+          if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+              isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+            NoopInput = *i;
+            break;
+          }
+        }
+      }
+    }
 
-    // Pointer to pointer cast.
-    if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
-      return getNoopInput(Op, TLI);
-    
-    if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
-        TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
-        TLI.isTypeLegal(EVT::getEVT(I->getType())))
-      return getNoopInput(Op, TLI);
-  }
-  
-  // Look through inttoptr.
-  if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
-    // Make sure this isn't a truncating or extending cast.  We could support
-    // this eventually, but don't bother for now.
-    if (TLI.getPointerTy().getSizeInBits() == 
-          cast<IntegerType>(Op->getType())->getBitWidth())
-      return getNoopInput(Op, TLI);
-  }
+    if (NoopInput) {
+      V1 = NoopInput;
+      continue;
+    }
 
-  // Look through ptrtoint.
-  if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
-    // Make sure this isn't a truncating or extending cast.  We could support
-    // this eventually, but don't bother for now.
-    if (TLI.getPointerTy().getSizeInBits() == 
-        cast<IntegerType>(I->getType())->getBitWidth())
-      return getNoopInput(Op, TLI);
+    // If we already swapped, avoid infinite loop
+    if (swapParity)
+      break;
+
+    // Otherwise, swap V1<->V2, Els1<->Els2
+    swap(V1, V2);
+    swap(Els1, Els2);
+    swapParity = !swapParity;
   }
 
+  for (unsigned n = 0; n < 2; ++n) {
+    if (isa<InsertValueInst>(V1)) {
+      if (isa<StructType>(V1->getType())) {
+        // Look through insertvalue
+        unsigned i, e;
+        for (i = 0, e = cast<StructType>(V1->getType())->getNumElements();
+             i != e; ++i) {
+          const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i);
+          if (InScalar == 0)
+            break;
+          Els1.push_back(i);
+          if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) {
+            Els1.pop_back();
+            break;
+          }
+          Els1.pop_back();
+        }
+        if (i == e) {
+          if (swapParity)
+            swap(Els1, Els2);
+          return true;
+        }
+      }
+    } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) {
+      const ExtractValueInst *EVI = cast<ExtractValueInst>(V1);
+      unsigned i = Els1.back();
+      // If the scalar value being inserted is an extractvalue of the right
+      // index from the call, then everything is good.
+      if (isa<StructType>(EVI->getOperand(0)->getType()) &&
+          EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) {
+        // Look through extractvalue
+        Els1.pop_back();
+        if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) {
+          Els1.push_back(i);
+          if (swapParity)
+            swap(Els1, Els2);
+          return true;
+        }
+        Els1.push_back(i);
+      }
+    }
 
-  // Otherwise it's not something we can look through.
-  return V;
-}
+    swap(V1, V2);
+    swap(Els1, Els2);
+    swapParity = !swapParity;
+  }
 
+  if (swapParity)
+    swap(Els1, Els2);
+  return false;
+}
 
 /// Test if the given instruction is in a position to be optimized
 /// with a tail-call. This roughly means that it's in a block with
@@ -264,7 +363,8 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
 /// between it and the return.
 ///
 /// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,
+                                const TargetLowering &TLI) {
   const Instruction *I = CS.getInstruction();
   const BasicBlock *ExitBB = I->getParent();
   const TerminatorInst *Term = ExitBB->getTerminator();
@@ -322,28 +422,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
     return false;
 
-  // Otherwise, make sure the unmodified return value of I is the return value.
-  // We handle two cases: multiple return values + scalars.
-  Value *RetVal = Ret->getOperand(0);
-  if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
-    // Handle scalars first.
-    return getNoopInput(Ret->getOperand(0), TLI) == I;
-  
-  // If this is an aggregate return, look through the insert/extract values and
-  // see if each is transparent.
-  for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
-       i != e; ++i) {
-    const Value *InScalar = FindInsertedValue(RetVal, i);
-    if (InScalar == 0) return false;
-    InScalar = getNoopInput(InScalar, TLI);
-    
-    // If the scalar value being inserted is an extractvalue of the right index
-    // from the call, then everything is good.
-    const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
-    if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
-        EVI->getIndices()[0] != i)
-      return false;
-  }
-  
-  return true;
+  // Otherwise, make sure the return value and I have the same value
+  SmallVector<unsigned, 4> Els1, Els2;
+  return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d4a745d..84162ac 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -133,9 +133,13 @@ const DataLayout &AsmPrinter::getDataLayout() const {
   return *TM.getDataLayout();
 }
 
+StringRef AsmPrinter::getTargetTriple() const {
+  return TM.getTargetTriple();
+}
+
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
-  return OutStreamer.getCurrentSection();
+  return OutStreamer.getCurrentSection().first;
 }
 
 
@@ -813,7 +817,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
   // caller might be in the middle of an dwarf expression. We should
   // probably assert that Reg >= 0 once debug info generation is more mature.
 
-  if (int Offset =  MLoc.getOffset()) {
+  if (MLoc.isIndirect()) {
     if (Reg < 32) {
       OutStreamer.AddComment(
         dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
@@ -824,7 +828,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
       OutStreamer.AddComment(Twine(Reg));
       EmitULEB128(Reg);
     }
-    EmitSLEB128(Offset);
+    EmitSLEB128(MLoc.getOffset());
   } else {
     if (Reg < 32) {
       OutStreamer.AddComment(
@@ -1213,7 +1217,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
 bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   if (GV->getName() == "llvm.used") {
     if (MAI->hasNoDeadStrip())    // No need to emit this at all.
-      EmitLLVMUsedList(GV->getInitializer());
+      EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
     return true;
   }
 
@@ -1256,11 +1260,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
 /// global in the specified llvm.used list for which emitUsedDirectiveFor
 /// is true, as being used with this directive.
-void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
   // Should be an array of 'i8*'.
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
-  if (InitList == 0) return;
-
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 156acac..31e42d4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -141,7 +141,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
 void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
                                    const MCSymbol *SectionLabel) const {
   // On COFF targets, we have to emit the special .secrel32 directive.
-  if (MAI->getDwarfSectionOffsetDirective()) {
+  if (MAI->needsDwarfSectionOffsetDirective()) {
     OutStreamer.EmitCOFFSecRel32(Label);
     return;
   }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 57e0acd..673867a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,8 +112,9 @@ DIE::~DIE() {
     delete Children[i];
 }
 
-/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
-DIE *DIE::getCompileUnit() const{
+/// Climb up the parent chain to get the compile unit DIE to which this DIE
+/// belongs.
+DIE *DIE::getCompileUnit() const {
   DIE *p = getParent();
   while (p) {
     if (p->getTag() == dwarf::DW_TAG_compile_unit)
@@ -124,8 +125,7 @@ DIE *DIE::getCompileUnit() const{
 }
 
 #ifndef NDEBUG
-void DIE::print(raw_ostream &O, unsigned IncIndent) {
-  IndentCount += IncIndent;
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
   const std::string Indent(IndentCount, ' ');
   bool isBlock = Abbrev.getTag() == 0;
 
@@ -164,11 +164,10 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount -= 2;
 
   for (unsigned j = 0, M = Children.size(); j < M; ++j) {
-    Children[j]->print(O, 4);
+    Children[j]->print(O, IndentCount+4);
   }
 
   if (!isBlock) O << "\n";
-  IndentCount -= IncIndent;
 }
 
 void DIE::dump() {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
index c332aa2..3c06001 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -139,8 +139,7 @@ namespace llvm {
     mutable unsigned IndentCount;
   public:
     explicit DIE(unsigned Tag)
-      : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
-        IndentCount(0) {}
+      : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {}
     virtual ~DIE();
 
     // Accessors.
@@ -179,7 +178,7 @@ namespace llvm {
     }
 
 #ifndef NDEBUG
-    void print(raw_ostream &O, unsigned IncIndent = 0);
+    void print(raw_ostream &O, unsigned IndentCount = 0) const;
     void dump();
 #endif
   };
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index f9b6f94..89abcff 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -685,7 +685,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
   return true;
 }
 
-/// addTemplateParams - Add template parameters in buffer.
+/// addTemplateParams - Add template parameters into buffer.
 void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
   // Add template parameters.
   for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
@@ -707,7 +707,7 @@ DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
     return getOrCreateNameSpace(DINameSpace(Context));
   else if (Context.isSubprogram())
     return getOrCreateSubprogramDIE(DISubprogram(Context));
-  else 
+  else
     return getDIE(Context);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 2b180c6..8f08c63 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -94,9 +94,6 @@ class CompileUnit {
   /// DWARF version doesn't handle the language, return -1.
   int64_t getDefaultLowerBound() const;
 
-  /// getOrCreateContextDIE - Get context owner's DIE.
-  DIE *getOrCreateContextDIE(DIDescriptor Context);
-
 public:
   CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
               DwarfUnits *);
@@ -372,6 +369,9 @@ public:
   /// createStaticMemberDIE - Create new static data member DIE.
   DIE *createStaticMemberDIE(DIDerivedType DT);
 
+  /// getOrCreateContextDIE - Get context owner's DIE.
+  DIE *getOrCreateContextDIE(DIDescriptor Context);
+
 private:
 
   // DIEValueAllocator - All DIEValues are allocated through this allocator.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 11eb983..1e706cc 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -94,6 +94,12 @@ static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
+
+  struct CompareFirst {
+    template <typename T> bool operator()(const T &lhs, const T &rhs) const {
+      return lhs.first < rhs.first;
+    }
+  };
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -170,12 +176,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+  DwarfAddrSectionSym = 0;
   DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
 
   // Turn on accelerator tables and older gdb compatibility
   // for Darwin.
-  bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+  bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin();
   if (DarwinGDBCompat == Default) {
     if (IsDarwin)
       IsDarwinGDBCompat = true;
@@ -596,9 +603,16 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   }
   else {
     // There is no need to emit empty lexical block DIE.
-    if (Children.empty())
+    std::pair<ImportedEntityMap::const_iterator,
+              ImportedEntityMap::const_iterator> Range = std::equal_range(
+        ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
+        std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0),
+        CompareFirst());
+    if (Children.empty() && Range.first == Range.second)
       return NULL;
     ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+    for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i)
+      constructImportedModuleDIE(TheCU, i->second, ScopeDIE);
   }
 
   if (!ScopeDIE) return NULL;
@@ -643,7 +657,7 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
 
   // We look up the CUID/file/dir by concatenating them with a zero byte.
   SmallString<128> NamePair;
-  NamePair += CUID;
+  NamePair += utostr(CUID);
   NamePair += '\0';
   NamePair += DirName;
   NamePair += '\0'; // Zero bytes are not allowed in paths.
@@ -681,9 +695,12 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
   NewCU->addString(Die, dwarf::DW_AT_name, FN);
+
   // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
-  // into an entity. We're using 0 (or a NULL label) for this.
-  NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+  // into an entity. We're using 0 (or a NULL label) for this. For
+  // split dwarf it's in the skeleton CU so omit it here.
+  if (!useSplitDwarf())
+    NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
 
   // Define start line table label for each Compile Unit.
   MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
@@ -691,21 +708,32 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
                                                      NewCU->getUniqueID());
 
+  // Use a single line table if we are using .loc and generating assembly.
+  bool UseTheFirstCU =
+    (Asm->TM.hasMCUseLoc() &&
+     Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) ||
+    (NewCU->getUniqueID() == 0);
+
   // DW_AT_stmt_list is a offset of line number information for this
-  // compile unit in debug_line section.
+  // compile unit in debug_line section. For split dwarf this is
+  // left in the skeleton CU and so not included.
   // The line table entries are not always emitted in assembly, so it
   // is not okay to use line_table_start here.
-  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                    NewCU->getUniqueID() == 0 ?
-                    Asm->GetTempSymbol("section_line") : LineTableStartSym);
-  else if (NewCU->getUniqueID() == 0)
-    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
-  else
-    NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                    LineTableStartSym, DwarfLineSectionSym);
+  if (!useSplitDwarf()) {
+    if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+      NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                      UseTheFirstCU ?
+                      Asm->GetTempSymbol("section_line") : LineTableStartSym);
+    else if (UseTheFirstCU)
+      NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+    else
+      NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                      LineTableStartSym, DwarfLineSectionSym);
+  }
 
-  if (!CompilationDir.empty())
+  // If we're using split dwarf the compilation dir is going to be in the
+  // skeleton CU and so we don't need to duplicate it here.
+  if (!useSplitDwarf() && !CompilationDir.empty())
     NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
   if (DIUnit.isOptimized())
     NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
@@ -754,6 +782,41 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
     TheCU->addGlobalName(SP.getName(), SubprogramDie);
 }
 
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+                                            const MDNode *N) {
+  DIImportedModule Module(N);
+  if (!Module.Verify())
+    return;
+  if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext()))
+    constructImportedModuleDIE(TheCU, Module, D);
+}
+
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+                                            DIE *Context) {
+  DIImportedModule Module(N);
+  if (!Module.Verify())
+    return;
+  return constructImportedModuleDIE(TheCU, Module, Context);
+}
+
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+                                            const DIImportedModule &Module,
+                                            DIE *Context) {
+  assert(Module.Verify() &&
+         "Use one of the MDNode * overloads to handle invalid metadata");
+  assert(Context && "Should always have a context for an imported_module");
+  DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module);
+  TheCU->insertDIE(Module, IMDie);
+  DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace());
+  unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(),
+                                        Module.getContext().getDirectory(),
+                                        TheCU->getUniqueID());
+  TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID);
+  TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber());
+  TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie);
+  Context->addChild(IMDie);
+}
+
 // Emit all Dwarf sections that should come prior to the content. Create
 // global DIEs and emit initial debug info sections. This is invoked by
 // the target AsmPrinter.
@@ -775,6 +838,13 @@ void DwarfDebug::beginModule() {
   for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
     DICompileUnit CUNode(CU_Nodes->getOperand(i));
     CompileUnit *CU = constructCompileUnit(CUNode);
+    DIArray ImportedModules = CUNode.getImportedModules();
+    for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+      ScopesWithImportedEntities.push_back(std::make_pair(
+          DIImportedModule(ImportedModules.getElement(i)).getContext(),
+          ImportedModules.getElement(i)));
+    std::sort(ScopesWithImportedEntities.begin(),
+              ScopesWithImportedEntities.end(), CompareFirst());
     DIArray GVs = CUNode.getGlobalVariables();
     for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
       CU->createGlobalVariableDIE(GVs.getElement(i));
@@ -787,11 +857,16 @@ void DwarfDebug::beginModule() {
     DIArray RetainedTypes = CUNode.getRetainedTypes();
     for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
       CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+    // Emit imported_modules last so that the relevant context is already
+    // available.
+    for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+      constructImportedModuleDIE(CU, ImportedModules.getElement(i));
     // If we're splitting the dwarf out now that we've got the entire
     // CU then construct a skeleton CU based upon it.
     if (useSplitDwarf()) {
-    // This should be a unique identifier when we want to build .dwp files.
-      CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+      // This should be a unique identifier when we want to build .dwp files.
+      CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
+                  dwarf::DW_FORM_data8, 0);
       // Now construct the skeleton CU associated.
       constructSkeletonCU(CUNode);
     }
@@ -1099,7 +1174,13 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
   }
   if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
     MachineLocation MLoc;
-    MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+    // TODO: Currently an offset of 0 in a DBG_VALUE means
+    // we need to generate a direct register value.
+    // There is no way to specify an indirect value with offset 0.
+    if (MI->getOperand(1).getImm() == 0)
+      MLoc.set(MI->getOperand(0).getReg());
+    else
+      MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
     return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
   }
   if (MI->getOperand(0).isImm())
@@ -1366,7 +1447,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
   CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
-  Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+  if (Asm->TM.hasMCUseLoc() &&
+      Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+    // Use a single line table if we are using .loc and generating assembly.
+    Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+  else
+    Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
 
   FunctionBeginSym = Asm->GetTempSymbol("func_begin",
                                         Asm->getFunctionNumber());
@@ -1768,9 +1854,12 @@ void DwarfDebug::emitSectionLabels() {
   emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym =
     emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
-  if (useSplitDwarf())
+  if (useSplitDwarf()) {
     DwarfStrDWOSectionSym =
       emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+    DwarfAddrSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
+  }
   DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
                                              "debug_range");
 
@@ -2538,9 +2627,14 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
   // This should be a unique identifier when we want to build .dwp files.
   NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
 
-  // FIXME: The addr base should be relative for each compile unit, however,
-  // this one is going to be 0 anyhow.
-  NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+  // Relocate to the beginning of the addr_base section, else 0 for the
+  // beginning of the one for this compile unit.
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset,
+                    DwarfAddrSectionSym);
+  else
+    NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base,
+                   dwarf::DW_FORM_sec_offset, 0);
 
   // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
   // into an entity. We're using 0, or a NULL label for this.
@@ -2548,6 +2642,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
 
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
+  // FIXME: Should handle multiple compile units.
   if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
     NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
                     DwarfLineSectionSym);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 81e345e..24f758d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -392,7 +392,7 @@ class DwarfDebug {
   // section offsets and are created by EmitSectionLabels.
   MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
+  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
   MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
 
@@ -433,6 +433,10 @@ class DwarfDebug {
   // Holder for the skeleton information.
   DwarfUnits SkeletonHolder;
 
+  typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
+    ImportedEntityMap;
+  ImportedEntityMap ScopesWithImportedEntities;
+
 private:
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -555,6 +559,18 @@ private:
   /// \brief Construct subprogram DIE.
   void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
 
+  /// \brief Construct import_module DIE.
+  void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N);
+
+  /// \brief Construct import_module DIE.
+  void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+                                  DIE *Context);
+
+  /// \brief Construct import_module DIE.
+  void constructImportedModuleDIE(CompileUnit *TheCU,
+                                  const DIImportedModule &Module,
+                                  DIE *Context);
+
   /// \brief Register a source line with debug info. Returns the unique
   /// label that was emitted and which provides correspondence to the
   /// source line list.
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index 012ff8a..4a99184 100644
--- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -204,20 +204,25 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
 
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
+  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
+  // Assume that floating point arithmetic operations cost twice as much as
+  // integer operations.
+  unsigned OpCost = (IsFloat ? 2 : 1);
+
   if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
     // The operation is legal. Assume it costs 1.
-    // If the type is split to multiple registers, assume that thre is some
+    // If the type is split to multiple registers, assume that there is some
     // overhead to this.
     // TODO: Once we have extract/insert subvector cost we need to use them.
     if (LT.first > 1)
-      return LT.first * 2;
-    return LT.first * 1;
+      return LT.first * 2 * OpCost;
+    return LT.first * 1 * OpCost;
   }
 
   if (!TLI->isOperationExpand(ISD, LT.second)) {
     // If the operation is custom lowered then assume
     // thare the code is twice as expensive.
-    return LT.first * 2;
+    return LT.first * 2 * OpCost;
   }
 
   // Else, assume that we need to scalarize this op.
@@ -230,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   }
 
   // We don't know anything about this scalar instruction.
-  return 1;
+  return OpCost;
 }
 
 unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index dee339a..38ae17d 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -117,7 +117,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
   float totalWeight = 0;
   SmallPtrSet<MachineInstr*, 8> visited;
 
-  // Find the best physreg hist and the best virtreg hint.
+  // Find the best physreg hint and the best virtreg hint.
   float bestPhys = 0, bestVirt = 0;
   unsigned hintPhys = 0, hintVirt = 0;
 
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index f1d4ace..75f4b96 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
   // No stack is used.
   StackOffset = 0;
 
-  clearFirstByValReg();
+  clearByValRegsInfo();
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 35ec68d..c641991 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index 9958d7d..8264d6d 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -1039,6 +1039,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
     return false;
   }
 
+  if (CvtBBI->BB->hasAddressTaken())
+    // Conservatively abort if-conversion if BB's address is taken.
+    return false;
+
   if (Kind == ICSimpleFalse)
     if (TII->ReverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
@@ -1054,6 +1058,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+
+    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+    // explicitly remove CvtBBI as a successor.
+    BBI.BB->removeSuccessor(CvtBBI->BB);
   } else {
     PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
 
@@ -1112,6 +1120,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     return false;
   }
 
+  if (CvtBBI->BB->hasAddressTaken())
+    // Conservatively abort if-conversion if BB's address is taken.
+    return false;
+
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
     if (TII->ReverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
@@ -1146,6 +1158,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+
+    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+    // explicitly remove CvtBBI as a successor.
+    BBI.BB->removeSuccessor(CvtBBI->BB);
   } else {
     // Predicate the 'true' block after removing its branch.
     CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
@@ -1176,7 +1192,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     // block. By not merging them, we make it possible to iteratively
     // ifcvt the blocks.
     if (!HasEarlyExit &&
-        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
+        !NextBBI->BB->hasAddressTaken()) {
       MergeBlocks(BBI, *NextBBI);
       FalseBBDead = true;
     } else {
@@ -1226,6 +1243,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
     return false;
   }
 
+  if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
+    // Conservatively abort if-conversion if either BB has its address taken.
+    return false;
+
   // Put the predicated instructions from the 'true' block before the
   // instructions from the 'false' block, unless the true block would clobber
   // the predicate, in which case, do the opposite.
@@ -1374,7 +1395,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // tail, add a unconditional branch to it.
   if (TailBB) {
     BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
-    bool CanMergeTail = !TailBBI.HasFallThrough;
+    bool CanMergeTail = !TailBBI.HasFallThrough &&
+      !TailBBI.BB->hasAddressTaken();
     // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
     // check if there are any other predecessors besides those.
     unsigned NumPreds = TailBB->pred_size();
@@ -1543,6 +1565,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 /// i.e., when FromBBI's branch is being moved, add those successor edges to
 /// ToBBI.
 void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+  assert(!FromBBI.BB->hasAddressTaken() &&
+         "Removing a BB whose address is taken!");
+
   ToBBI.BB->splice(ToBBI.BB->end(),
                    FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
 
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index c6d1a18..35295fe 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -955,18 +955,21 @@ void InlineSpiller::reMaterializeAll() {
   Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
 
   // Get rid of deleted and empty intervals.
-  for (unsigned i = RegsToSpill.size(); i != 0; --i) {
-    unsigned Reg = RegsToSpill[i-1];
-    if (!LIS.hasInterval(Reg)) {
-      RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+  unsigned ResultPos = 0;
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    if (!LIS.hasInterval(Reg))
       continue;
-    }
+
     LiveInterval &LI = LIS.getInterval(Reg);
-    if (!LI.empty())
+    if (LI.empty()) {
+      Edit->eraseVirtReg(Reg);
       continue;
-    Edit->eraseVirtReg(Reg);
-    RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+    }
+
+    RegsToSpill[ResultPos++] = Reg;
   }
+  RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
   DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
 }
 
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 07f0ccf..d894f66 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -453,6 +453,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
     break;
 
+  case Intrinsic::annotation:
+  case Intrinsic::ptr_annotation:
+    // Just drop the annotation, but forward the value
+    CI->replaceAllUsesWith(CI->getOperand(0));
+    break;
+
   case Intrinsic::var_annotation:
     break;   // Strip out annotate intrinsic
     
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 352ef94..26a1176 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -46,13 +46,16 @@ namespace {
   class FrameRef {
     MachineBasicBlock::iterator MI; // Instr referencing the frame
     int64_t LocalOffset;            // Local offset of the frame idx referenced
+    int FrameIdx;                   // The frame index
   public:
-    FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
-      MI(I), LocalOffset(Offset) {}
+    FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
+      MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
     bool operator<(const FrameRef &RHS) const {
       return LocalOffset < RHS.LocalOffset;
     }
-    MachineBasicBlock::iterator getMachineInstr() { return MI; }
+    MachineBasicBlock::iterator getMachineInstr() const { return MI; }
+    int64_t getLocalOffset() const { return LocalOffset; }
+    int getFrameIndex() const { return FrameIdx; }
   };
 
   class LocalStackSlotPass: public MachineFunctionPass {
@@ -194,22 +197,15 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 }
 
 static inline bool
-lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
-                       std::pair<unsigned, int64_t> &RegOffset,
+lookupCandidateBaseReg(int64_t BaseOffset,
                        int64_t FrameSizeAdjust,
                        int64_t LocalFrameOffset,
                        const MachineInstr *MI,
                        const TargetRegisterInfo *TRI) {
-  unsigned e = Regs.size();
-  for (unsigned i = 0; i < e; ++i) {
-    RegOffset = Regs[i];
-    // Check if the relative offset from the where the base register references
-    // to the target address is in range for the instruction.
-    int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
-    if (TRI->isFrameOffsetLegal(MI, Offset))
-      return true;
-  }
-  return false;
+  // Check if the relative offset from the where the base register references
+  // to the target address is in range for the instruction.
+  int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
+  return TRI->isFrameOffsetLegal(MI, Offset);
 }
 
 bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -233,9 +229,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
   // choose the first one).
   SmallVector<FrameRef, 64> FrameReferenceInsns;
 
-  // A base register definition is a register + offset pair.
-  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
-
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
       MachineInstr *MI = I;
@@ -258,8 +251,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
           // Don't try this with values not in the local block.
           if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
             break;
+          int Idx = MI->getOperand(i).getIndex();
+          int64_t LocalOffset = LocalOffsets[Idx];
+          if (!TRI->needsFrameBaseReg(MI, LocalOffset))
+            break;
           FrameReferenceInsns.
-            push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+            push_back(FrameRef(MI, LocalOffset, Idx));
           break;
         }
       }
@@ -271,86 +268,106 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
 
   MachineBasicBlock *Entry = Fn.begin();
 
+  unsigned BaseReg = 0;
+  int64_t BaseOffset = 0;
+
   // Loop through the frame references and allocate for them as necessary.
   for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
-    MachineBasicBlock::iterator I =
-      FrameReferenceInsns[ref].getMachineInstr();
+    FrameRef &FR = FrameReferenceInsns[ref];
+    MachineBasicBlock::iterator I = FR.getMachineInstr();
     MachineInstr *MI = I;
-    for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
-      // Consider replacing all frame index operands that reference
-      // an object allocated in the local block.
-      if (MI->getOperand(idx).isFI()) {
-        int FrameIdx = MI->getOperand(idx).getIndex();
-
-        assert(MFI->isObjectPreAllocated(FrameIdx) &&
-               "Only pre-allocated locals expected!");
-
-        DEBUG(dbgs() << "Considering: " << *MI);
-        if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
-          unsigned BaseReg = 0;
-          int64_t Offset = 0;
-          int64_t FrameSizeAdjust =
-            StackGrowsDown ? MFI->getLocalFrameSize() : 0;
-
-          DEBUG(dbgs() << "  Replacing FI in: " << *MI);
-
-          // If we have a suitable base register available, use it; otherwise
-          // create a new one. Note that any offset encoded in the
-          // instruction itself will be taken into account by the target,
-          // so we don't have to adjust for it here when reusing a base
-          // register.
-          std::pair<unsigned, int64_t> RegOffset;
-          if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
-                                     FrameSizeAdjust,
-                                     LocalOffsets[FrameIdx],
-                                     MI, TRI)) {
-            DEBUG(dbgs() << "  Reusing base register " <<
-                  RegOffset.first << "\n");
-            // We found a register to reuse.
-            BaseReg = RegOffset.first;
-            Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
-              RegOffset.second;
-          } else {
-            // No previously defined register was in range, so create a
-            // new one.
-            int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
-            const MachineFunction *MF = MI->getParent()->getParent();
-            const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
-            BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
-
-            DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
-                  " at frame local offset " <<
-                  LocalOffsets[FrameIdx] + InstrOffset << "\n");
-
-            // Tell the target to insert the instruction to initialize
-            // the base register.
-            //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
-            TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
-                                              InstrOffset);
-
-            // The base register already includes any offset specified
-            // by the instruction, so account for that so it doesn't get
-            // applied twice.
-            Offset = -InstrOffset;
-
-            int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
-              InstrOffset;
-            BaseRegisters.push_back(
-              std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
-            ++NumBaseRegisters;
-            UsedBaseReg = true;
-          }
-          assert(BaseReg != 0 && "Unable to allocate virtual base register!");
-
-          // Modify the instruction to use the new base register rather
-          // than the frame index operand.
-          TRI->resolveFrameIndex(I, BaseReg, Offset);
-          DEBUG(dbgs() << "Resolved: " << *MI);
-
-          ++NumReplacements;
-        }
+    int64_t LocalOffset = FR.getLocalOffset();
+    int FrameIdx = FR.getFrameIndex();
+    assert(MFI->isObjectPreAllocated(FrameIdx) &&
+           "Only pre-allocated locals expected!");
+
+    DEBUG(dbgs() << "Considering: " << *MI);
+
+    unsigned idx = 0;
+    for (unsigned f = MI->getNumOperands(); idx != f; ++idx) {
+      if (!MI->getOperand(idx).isFI())
+        continue;
+
+      if (FrameIdx == I->getOperand(idx).getIndex())
+        break;
+    }
+
+    assert(idx < MI->getNumOperands() && "Cannot find FI operand");
+
+    int64_t Offset = 0;
+    int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+    DEBUG(dbgs() << "  Replacing FI in: " << *MI);
+
+    // If we have a suitable base register available, use it; otherwise
+    // create a new one. Note that any offset encoded in the
+    // instruction itself will be taken into account by the target,
+    // so we don't have to adjust for it here when reusing a base
+    // register.
+    if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+                                              LocalOffset, MI, TRI)) {
+      DEBUG(dbgs() << "  Reusing base register " << BaseReg << "\n");
+      // We found a register to reuse.
+      Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
+    } else {
+      // No previously defined register was in range, so create a // new one.
+ 
+      int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+
+      int64_t PrevBaseOffset = BaseOffset;
+      BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
+
+      // We'd like to avoid creating single-use virtual base registers.
+      // Because the FrameRefs are in sorted order, and we've already
+      // processed all FrameRefs before this one, just check whether or not
+      // the next FrameRef will be able to reuse this new register. If not,
+      // then don't bother creating it.
+      bool CanReuse = false;
+      for (int refn = ref + 1; refn < e; ++refn) {
+        FrameRef &FRN = FrameReferenceInsns[refn];
+        MachineBasicBlock::iterator J = FRN.getMachineInstr();
+        MachineInstr *MIN = J;
+
+        CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+                                          FRN.getLocalOffset(), MIN, TRI);
+        break;
       }
+
+      if (!CanReuse) {
+        BaseOffset = PrevBaseOffset;
+        continue;
+      }
+
+      const MachineFunction *MF = MI->getParent()->getParent();
+      const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
+      BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+      DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
+            " at frame local offset " << LocalOffset + InstrOffset << "\n");
+
+      // Tell the target to insert the instruction to initialize
+      // the base register.
+      //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
+      TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+                                        InstrOffset);
+
+      // The base register already includes any offset specified
+      // by the instruction, so account for that so it doesn't get
+      // applied twice.
+      Offset = -InstrOffset;
+
+      ++NumBaseRegisters;
+      UsedBaseReg = true;
     }
+    assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+    // Modify the instruction to use the new base register rather
+    // than the frame index operand.
+    TRI->resolveFrameIndex(I, BaseReg, Offset);
+    DEBUG(dbgs() << "Resolved: " << *MI);
+
+    ++NumReplacements;
   }
+
   return UsedBaseReg;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 898e165..78e9950 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 
 MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
   : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
-    AddressTaken(false) {
+    AddressTaken(false), CachedMCSymbol(NULL) {
   Insts.Parent = this;
 }
 
@@ -48,12 +48,16 @@ MachineBasicBlock::~MachineBasicBlock() {
 /// getSymbol - Return the MCSymbol for this basic block.
 ///
 MCSymbol *MachineBasicBlock::getSymbol() const {
-  const MachineFunction *MF = getParent();
-  MCContext &Ctx = MF->getContext();
-  const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
-  return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
-                               Twine(MF->getFunctionNumber()) + "_" +
-                               Twine(getNumber()));
+  if (!CachedMCSymbol) {
+    const MachineFunction *MF = getParent();
+    MCContext &Ctx = MF->getContext();
+    const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+    CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+                                           Twine(MF->getFunctionNumber()) +
+                                           "_" + Twine(getNumber()));
+  }
+
+  return CachedMCSymbol;
 }
 
 
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index cd948e2..bfba503 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -52,6 +53,11 @@ STATISTIC(CondBranchTakenFreq,
 STATISTIC(UncondBranchTakenFreq,
           "Potential frequency of taking unconditional branches");
 
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+                                       cl::desc("Force the alignment of all "
+                                                "blocks in the function."),
+                                       cl::init(0), cl::Hidden);
+
 namespace {
 class BlockChain;
 /// \brief Type for our function-wide basic block -> block chain mapping.
@@ -1088,6 +1094,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   BlockToChain.clear();
   ChainAllocator.DestroyAll();
 
+  if (AlignAllBlock)
+    // Align all of the blocks in the function to a specific alignment.
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+         FI != FE; ++FI)
+      FI->setAlignment(AlignAllBlock);
+
   // We always return true as we have no way to track whether the final order
   // differs from the original order.
   return true;
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 0ea9ae0..8af9d05 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -326,8 +326,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) {
   if (!GV || !GV->hasInitializer()) return;
 
   // Should be an array of 'i8*'.
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (InitList == 0) return;
+  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (const Function *F =
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 1af00e8..68372f6 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -15,6 +15,8 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/raw_os_ostream.h"
+
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
@@ -106,13 +108,59 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
 /// clearVirtRegs - Remove all virtual registers (after physreg assignment).
 void MachineRegisterInfo::clearVirtRegs() {
 #ifndef NDEBUG
-  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
-    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
-           "Vreg use list non-empty still?");
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (!VRegInfo[Reg].second)
+      continue;
+    verifyUseList(Reg);
+    llvm_unreachable("Remaining virtual register operands");
+  }
 #endif
   VRegInfo.clear();
 }
 
+void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
+#ifndef NDEBUG
+  bool Valid = true;
+  for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
+    MachineOperand *MO = &I.getOperand();
+    MachineInstr *MI = MO->getParent();
+    if (!MI) {
+      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+             << " has no parent instruction.\n";
+      Valid = false;
+    }
+    MachineOperand *MO0 = &MI->getOperand(0);
+    unsigned NumOps = MI->getNumOperands();
+    if (!(MO >= MO0 && MO < MO0+NumOps)) {
+      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+             << " doesn't belong to parent MI: " << *MI;
+      Valid = false;
+    }
+    if (!MO->isReg()) {
+      errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO
+             << " is not a register\n";
+      Valid = false;
+    }
+    if (MO->getReg() != Reg) {
+      errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": "
+             << *MO << " is the wrong register\n";
+      Valid = false;
+    }
+  }
+  assert(Valid && "Invalid use list");
+#endif
+}
+
+void MachineRegisterInfo::verifyUseLists() const {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+  for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i)
+    verifyUseList(i);
+#endif
+}
+
 /// Add MO to the linked list of operands for its register.
 void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
   assert(!MO->isOnRegUseList() && "Already on list");
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index 5bd2349..fff6b2b 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -51,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
-// Experimental heuristics
+// FIXME: remove this flag after initial testing. It should always be a good
+// thing.
+static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden,
+    cl::desc("Constrain vreg copies."), cl::init(true));
+
 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
   cl::desc("Enable load clustering."), cl::init(true));
 
@@ -323,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() {
   delete SchedImpl;
 }
 
+bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
   if (SuccSU != &ExitSU) {
     // Do not use WillCreateCycle, it assumes SD scheduling.
@@ -404,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
   }
 }
 
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
                                     MachineBasicBlock::iterator InsertPos) {
   // Advance RegionBegin if the first instruction moves down.
@@ -505,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
     if ((int)NewMaxPressure[ID] > MaxUnits)
       MaxUnits = NewMaxPressure[ID];
   }
+  DEBUG(
+    for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
+      unsigned Limit = TRI->getRegPressureSetLimit(i);
+      if (NewMaxPressure[i] > Limit ) {
+        dbgs() << "  " << TRI->getRegPressureSetName(i) << ": "
+               << NewMaxPressure[i] << " > " << Limit << "\n";
+      }
+    });
 }
 
 /// schedule - Called back from MachineScheduler::runOnMachineFunction
@@ -905,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
 }
 
 //===----------------------------------------------------------------------===//
+// CopyConstrain - DAG post-processing to encourage copy elimination.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create weak edges from all uses of a copy to
+/// the one use that defines the copy's source vreg, most likely an induction
+/// variable increment.
+class CopyConstrain : public ScheduleDAGMutation {
+  // Transient state.
+  SlotIndex RegionBeginIdx;
+  // RegionEndIdx is the slot index of the last non-debug instruction in the
+  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
+  SlotIndex RegionEndIdx;
+public:
+  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
+
+  virtual void apply(ScheduleDAGMI *DAG);
+
+protected:
+  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// constrainLocalCopy handles two possibilities:
+/// 1) Local src:
+/// I0:     = dst
+/// I1: src = ...
+/// I2:     = dst
+/// I3: dst = src (copy)
+/// (create pred->succ edges I0->I1, I2->I1)
+///
+/// 2) Local copy:
+/// I0: dst = src (copy)
+/// I1:     = dst
+/// I2: src = ...
+/// I3:     = dst
+/// (create pred->succ edges I1->I2, I3->I2)
+///
+/// Although the MachineScheduler is currently constrained to single blocks,
+/// this algorithm should handle extended blocks. An EBB is a set of
+/// contiguously numbered blocks such that the previous block in the EBB is
+/// always the single predecessor.
+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
+  LiveIntervals *LIS = DAG->getLIS();
+  MachineInstr *Copy = CopySU->getInstr();
+
+  // Check for pure vreg copies.
+  unsigned SrcReg = Copy->getOperand(1).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+    return;
+
+  unsigned DstReg = Copy->getOperand(0).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+    return;
+
+  // Check if either the dest or source is local. If it's live across a back
+  // edge, it's not local. Note that if both vregs are live across the back
+  // edge, we cannot successfully contrain the copy without cyclic scheduling.
+  unsigned LocalReg = DstReg;
+  unsigned GlobalReg = SrcReg;
+  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
+  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
+    LocalReg = SrcReg;
+    GlobalReg = DstReg;
+    LocalLI = &LIS->getInterval(LocalReg);
+    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
+      return;
+  }
+  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
+
+  // Find the global segment after the start of the local LI.
+  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
+  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
+  // local live range. We could create edges from other global uses to the local
+  // start, but the coalescer should have already eliminated these cases, so
+  // don't bother dealing with it.
+  if (GlobalSegment == GlobalLI->end())
+    return;
+
+  // If GlobalSegment is killed at the LocalLI->start, the call to find()
+  // returned the next global segment. But if GlobalSegment overlaps with
+  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
+  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
+  if (GlobalSegment->contains(LocalLI->beginIndex()))
+    ++GlobalSegment;
+
+  if (GlobalSegment == GlobalLI->end())
+    return;
+
+  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
+  if (GlobalSegment != GlobalLI->begin()) {
+    // Two address defs have no hole.
+    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
+                               GlobalSegment->start)) {
+      return;
+    }
+    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
+    // it would be a disconnected component in the live range.
+    assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
+           "Disconnected LRG within the scheduling region.");
+  }
+  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
+  if (!GlobalDef)
+    return;
+
+  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
+  if (!GlobalSU)
+    return;
+
+  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
+  // constraining the uses of the last local def to precede GlobalDef.
+  SmallVector<SUnit*,8> LocalUses;
+  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
+  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
+  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
+  for (SUnit::const_succ_iterator
+         I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
+       I != E; ++I) {
+    if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+      continue;
+    if (I->getSUnit() == GlobalSU)
+      continue;
+    if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+      return;
+    LocalUses.push_back(I->getSUnit());
+  }
+  // Open the top of the GlobalLI hole by constraining any earlier global uses
+  // to precede the start of LocalLI.
+  SmallVector<SUnit*,8> GlobalUses;
+  MachineInstr *FirstLocalDef =
+    LIS->getInstructionFromIndex(LocalLI->beginIndex());
+  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
+  for (SUnit::const_pred_iterator
+         I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
+    if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+      continue;
+    if (I->getSUnit() == FirstLocalSU)
+      continue;
+    if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+      return;
+    GlobalUses.push_back(I->getSUnit());
+  }
+  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
+  // Add the weak edges.
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
+    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
+          << GlobalSU->NodeNum << ")\n");
+    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
+  }
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
+    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
+          << FirstLocalSU->NodeNum << ")\n");
+    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
+  }
+}
+
+/// \brief Callback from DAG postProcessing to create weak edges to encourage
+/// copy elimination.
+void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
+  if (FirstPos == DAG->end())
+    return;
+  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
+  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
+    &*priorNonDebug(DAG->end(), DAG->begin()));
+
+  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+    SUnit *SU = &DAG->SUnits[Idx];
+    if (!SU->getInstr()->isCopy())
+      continue;
+
+    constrainLocalCopy(SU, DAG);
+  }
+}
+
+//===----------------------------------------------------------------------===//
 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
@@ -916,7 +1112,7 @@ public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason {
-    NoCand, SingleExcess, SingleCritical, Cluster,
+    NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,
     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
     TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
     NodeOrder};
@@ -1191,6 +1387,8 @@ protected:
                          const RegPressureTracker &RPTracker,
                          SchedCandidate &Candidate);
 
+  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+
 #ifndef NDEBUG
   void traceCandidate(const SchedCandidate &Cand);
 #endif
@@ -1339,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
   for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
        I != E; ++I) {
     unsigned L = getUnscheduledLatency(*I);
+    DEBUG(dbgs() << "  " << Available.getName()
+          << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
     if (L > RemLatency)
       RemLatency = L;
   }
@@ -1349,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
       RemLatency = L;
   }
   unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+  DEBUG(dbgs() << "  " << Available.getName()
+        << " ExpectedLatency " << ExpectedLatency
+        << " CP Limit " << CriticalPathLimit << '\n');
   if (RemLatency + ExpectedLatency >= CriticalPathLimit
       && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
     Policy.ReduceLatency = true;
-    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+    DEBUG(dbgs() << "  Increase ILP: " << Available.getName() << '\n');
   }
 }
 
@@ -1569,7 +1772,8 @@ void ConvergingScheduler::balanceZones(
   if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
       > (int)SchedModel->getLatencyFactor()) {
     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+    DEBUG(dbgs() << "  Balance " << CriticalZone.Available.getName()
+          << " reduce "
           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
           << '\n');
   }
@@ -1580,7 +1784,8 @@ void ConvergingScheduler::balanceZones(
   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
       > (int)SchedModel->getLatencyFactor()) {
     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+    DEBUG(dbgs() << "  Balance " << OppositeZone.Available.getName()
+          << " demand "
           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
           << '\n');
   }
@@ -1604,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits(
     if (Top.CritResIdx != Rem.CritResIdx) {
       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
-      DEBUG(dbgs() << "Reduce scheduled "
+      DEBUG(dbgs() << "  Reduce scheduled "
             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
     }
     return;
@@ -1621,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits(
         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
       TopCand.Policy.ReduceLatency = true;
       BotCand.Policy.ReduceLatency = true;
-      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+      DEBUG(dbgs() << "  Reduce scheduled latency " << Top.ExpectedLatency
             << " + " << Bot.ExpectedLatency << '\n');
     }
     return;
@@ -1696,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
 }
 
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+  const MachineInstr *MI = SU->getInstr();
+  if (!MI->isCopy())
+    return 0;
+
+  unsigned ScheduledOper = isTop ? 1 : 0;
+  unsigned UnscheduledOper = isTop ? 0 : 1;
+  // If we have already scheduled the physreg produce/consumer, immediately
+  // schedule the copy.
+  if (TargetRegisterInfo::isPhysicalRegister(
+        MI->getOperand(ScheduledOper).getReg()))
+    return 1;
+  // If the physreg is at the boundary, defer it. Otherwise schedule it
+  // immediately to free the dependent. We can hoist the copy later.
+  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+  if (TargetRegisterInfo::isPhysicalRegister(
+        MI->getOperand(UnscheduledOper).getReg()))
+    return AtBoundary ? -1 : 1;
+  return 0;
+}
+
 /// Apply a set of heursitics to a new candidate. Heuristics are currently
 /// hierarchical. This may be more efficient than a graduated cost model because
 /// we don't need to evaluate all aspects of the model for each node in the
@@ -1723,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
     TryCand.Reason = NodeOrder;
     return;
   }
+
+  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
+                 biasPhysRegCopy(Cand.SU, Zone.isTop()),
+                 TryCand, Cand, PhysRegCopy))
+    return;
+
   // Avoid exceeding the target's limit.
   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
@@ -1749,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
                  TryCand, Cand, Cluster))
     return;
-  // Currently, weak edges are for clustering, so we hard-code that reason.
-  // However, deferring the current TryCand will not change Cand's reason.
+
+  // Weak edges are for clustering and other constraints.
+  //
+  // Deferring TryCand here does not change Cand's reason. This is good in the
+  // sense that a bad candidate shouldn't affect a previous candidate's
+  // goodness, but bad in that it is assymetric and depends on queue order.
   CandReason OrigReason = Cand.Reason;
   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
               getWeakLeft(Cand.SU, Zone.isTop()),
-              TryCand, Cand, Cluster)) {
+              TryCand, Cand, Weak)) {
     Cand.Reason = OrigReason;
     return;
   }
@@ -1825,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
 
   // Avoid increasing the max critical pressure in the scheduled region.
   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
-    DEBUG(dbgs() << "RP excess top - bot: "
+    DEBUG(dbgs() << "  RP excess top - bot: "
           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
   }
   // Avoid increasing the max critical pressure in the scheduled region.
   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
-    DEBUG(dbgs() << "RP critical top - bot: "
+    DEBUG(dbgs() << "  RP critical top - bot: "
           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
           << '\n');
     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
   }
   // Avoid increasing the max pressure of the entire region.
   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
-    DEBUG(dbgs() << "RP current top - bot: "
+    DEBUG(dbgs() << "  RP current top - bot: "
           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
           << '\n');
     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
@@ -1851,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr(
   ConvergingScheduler::CandReason Reason) {
   switch (Reason) {
   case NoCand:         return "NOCAND    ";
+  case PhysRegCopy:    return "PREG-COPY";
   case SingleExcess:   return "REG-EXCESS";
   case SingleCritical: return "REG-CRIT  ";
   case Cluster:        return "CLUSTER   ";
+  case Weak:           return "WEAK      ";
   case SingleMax:      return "REG-MAX   ";
   case MultiPressure:  return "REG-MULTI ";
   case ResourceReduce: return "RES-REDUCE";
@@ -1953,8 +2198,7 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
 
 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
                       bool IsTop) {
-  DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot")
-        << " SU(" << Cand.SU->NodeNum << ") "
+  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
 }
 
@@ -1964,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // efficient, but also provides the best heuristics for CriticalPSets.
   if (SUnit *SU = Bot.pickOnlyChoice()) {
     IsTopNode = false;
+    DEBUG(dbgs() << "Pick Top NOCAND\n");
     return SU;
   }
   if (SUnit *SU = Top.pickOnlyChoice()) {
     IsTopNode = true;
+    DEBUG(dbgs() << "Pick Bot NOCAND\n");
     return SU;
   }
   CandPolicy NoPolicy;
@@ -2065,21 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
   if (SU->isBottomReady())
     Bot.removeReady(SU);
 
-  DEBUG(dbgs() << "Scheduling " << *SU->getInstr());
+  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
   return SU;
 }
 
+void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+
+  MachineBasicBlock::iterator InsertPos = SU->getInstr();
+  if (!isTop)
+    ++InsertPos;
+  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+  // Find already scheduled copies with a single physreg dependence and move
+  // them just above the scheduled instruction.
+  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
+       I != E; ++I) {
+    if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+      continue;
+    SUnit *DepSU = I->getSUnit();
+    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+      continue;
+    MachineInstr *Copy = DepSU->getInstr();
+    if (!Copy->isCopy())
+      continue;
+    DEBUG(dbgs() << "  Rescheduling physreg copy ";
+          I->getSUnit()->dump(DAG));
+    DAG->moveInstruction(Copy, InsertPos);
+  }
+}
+
 /// Update the scheduler's state after scheduling a node. This is the same node
 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
 /// it's state based on the current cycle before MachineSchedStrategy does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysRegCopy.
 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = Top.CurrCycle;
     Top.bumpNode(SU);
+    if (SU->hasPhysRegUses)
+      reschedulePhysRegCopies(SU, true);
   }
   else {
     SU->BotReadyCycle = Bot.CurrCycle;
     Bot.bumpNode(SU);
+    if (SU->hasPhysRegDefs)
+      reschedulePhysRegCopies(SU, false);
   }
 }
 
@@ -2090,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
          "-misched-topdown incompatible with -misched-bottomup");
   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
   // Register DAG post-processors.
+  //
+  // FIXME: extend the mutation API to allow earlier mutations to instantiate
+  // data and pass it to later mutations. Have a single mutation that gathers
+  // the interesting nodes in one pass.
+  if (EnableCopyConstrain)
+    DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
   if (EnableLoadCluster)
     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
   if (EnableMacroFusion)
@@ -2179,12 +2463,12 @@ public:
     SUnit *SU = ReadyQ.back();
     ReadyQ.pop_back();
     IsTopNode = false;
-    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
-          << *SU->getInstr()
+    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
           << " ILP: " << DAG->getDFSResult()->getILP(SU)
           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
           << DAG->getDFSResult()->getSubtreeLevel(
-            DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
+            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
+          << "Scheduling " << *SU->getInstr());
     return SU;
   }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 49d8c4e..00f702c 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1200,8 +1200,10 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
   return std::max(Instrs, PRMax);
 }
 
+
 unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
+                  ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
   // Add up resources above and below the center block.
   ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
   ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
@@ -1210,6 +1212,18 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
     unsigned PRCycles = PRDepths[K] + PRHeights[K];
     for (unsigned I = 0; I != Extrablocks.size(); ++I)
       PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+    for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
+      const MCSchedClassDesc* SC = ExtraInstrs[I];
+      if (!SC->isValid())
+        continue;
+      for (TargetSchedModel::ProcResIter
+             PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+             PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+        if (PI->ProcResourceIdx != K)
+          continue;
+        PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
+      }
+    }
     PRMax = std::max(PRMax, PRCycles);
   }
   // Convert to cycle count.
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 4b12300..037043f 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -472,6 +472,9 @@ void MachineVerifier::visitMachineFunctionBefore() {
     if (MInfo.Succs.size() != I->succ_size())
       report("MBB has duplicate entries in its successor list.", I);
   }
+
+  // Check that the register use lists are sane.
+  MRI->verifyUseLists();
 }
 
 // Does iterator point to a and b as the first two elements?
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index 1af65c8..bfbc062 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -93,9 +93,10 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
 /// simple binary flags that either suppress the pass or do nothing.
 /// i.e. -disable-mypass=false has no effect.
 /// These should be converted to boolOrDefault in order to use applyOverride.
-static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
+static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
+                                       bool Override) {
   if (Override)
-    return 0;
+    return IdentifyingPassPtr();
   return PassID;
 }
 
@@ -103,19 +104,20 @@ static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
 /// flags with ternary conditions. TargetID is passed through by default. The
 /// pass is suppressed when the option is false. When the option is true, the
 /// StandardID is selected if the target provides no default.
-static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
-                                AnalysisID StandardID) {
+static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
+                                        cl::boolOrDefault Override,
+                                        AnalysisID StandardID) {
   switch (Override) {
   case cl::BOU_UNSET:
     return TargetID;
   case cl::BOU_TRUE:
-    if (TargetID)
+    if (TargetID.isValid())
       return TargetID;
     if (StandardID == 0)
       report_fatal_error("Target cannot enable pass");
     return StandardID;
   case cl::BOU_FALSE:
-    return 0;
+    return IdentifyingPassPtr();
   }
   llvm_unreachable("Invalid command line option state");
 }
@@ -132,7 +134,8 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
 /// StandardID may be a pseudo ID. In that case TargetID is the name of the real
 /// pass to run. This allows multiple options to control a single pass depending
 /// on where in the pipeline that pass is added.
-static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+static IdentifyingPassPtr overridePass(AnalysisID StandardID,
+                                       IdentifyingPassPtr TargetID) {
   if (StandardID == &PostRASchedulerID)
     return applyDisable(TargetID, DisablePostRA);
 
@@ -200,11 +203,11 @@ public:
   // user interface. For example, a target may disable a standard pass by
   // default by substituting a pass ID of zero, and the user may still enable
   // that standard pass with an explicit command line option.
-  DenseMap<AnalysisID,AnalysisID> TargetPasses;
+  DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses;
 
   /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
   /// is inserted after each instance of the first one.
-  SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
+  SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
 };
 } // namespace llvm
 
@@ -239,9 +242,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
 
 /// Insert InsertedPassID pass after TargetPassID.
 void TargetPassConfig::insertPass(AnalysisID TargetPassID,
-                                  AnalysisID InsertedPassID) {
-  assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
-  std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+                                  IdentifyingPassPtr InsertedPassID) {
+  assert(((!InsertedPassID.isInstance() &&
+           TargetPassID != InsertedPassID.getID()) ||
+          (InsertedPassID.isInstance() &&
+           TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
+         "Insert a pass after itself!");
+  std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
   Impl->InsertedPasses.push_back(P);
 }
 
@@ -265,12 +272,12 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) {
 }
 
 void TargetPassConfig::substitutePass(AnalysisID StandardID,
-                                      AnalysisID TargetID) {
+                                      IdentifyingPassPtr TargetID) {
   Impl->TargetPasses[StandardID] = TargetID;
 }
 
-AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
-  DenseMap<AnalysisID, AnalysisID>::const_iterator
+IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+  DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator
     I = Impl->TargetPasses.find(ID);
   if (I == Impl->TargetPasses.end())
     return ID;
@@ -303,24 +310,39 @@ void TargetPassConfig::addPass(Pass *P) {
 
 /// Add a CodeGen pass at this point in the pipeline after checking for target
 /// and command line overrides.
+///
+/// addPass cannot return a pointer to the pass instance because is internal the
+/// PassManager and the instance we create here may already be freed.
 AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
-  AnalysisID TargetID = getPassSubstitution(PassID);
-  AnalysisID FinalID = overridePass(PassID, TargetID);
-  if (FinalID == 0)
-    return FinalID;
-
-  Pass *P = Pass::createPass(FinalID);
-  if (!P)
-    llvm_unreachable("Pass ID not registered");
-  addPass(P);
+  IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
+  IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
+  if (!FinalPtr.isValid())
+    return 0;
+
+  Pass *P;
+  if (FinalPtr.isInstance())
+    P = FinalPtr.getInstance();
+  else {
+    P = Pass::createPass(FinalPtr.getID());
+    if (!P)
+      llvm_unreachable("Pass ID not registered");
+  }
+  AnalysisID FinalID = P->getPassID();
+  addPass(P); // Ends the lifetime of P.
+
   // Add the passes after the pass P if there is any.
-  for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+  for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator
          I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
        I != E; ++I) {
     if ((*I).first == PassID) {
-      assert((*I).second && "Illegal Pass ID!");
-      Pass *NP = Pass::createPass((*I).second);
-      assert(NP && "Pass ID not registered");
+      assert((*I).second.isValid() && "Illegal Pass ID!");
+      Pass *NP;
+      if ((*I).second.isInstance())
+        NP = (*I).second.getInstance();
+      else {
+        NP = Pass::createPass((*I).second.getID());
+        assert(NP && "Pass ID not registered");
+      }
       addPass(NP);
     }
   }
@@ -687,14 +709,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
   addPass(&VirtRegRewriterID);
   printAndVerify("After Virtual Register Rewriter");
 
-  // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
-  // but eventually, all users of it should probably be moved to addPostRA and
-  // it can go away.  Currently, it's the intended place for targets to run
-  // FinalizeMachineBundles, because passes other than MachineScheduling an
-  // RegAlloc itself may not be aware of bundles.
-  if (addFinalizeRegAlloc())
-    printAndVerify("After RegAlloc finalization");
-
   // Perform stack slot coloring and post-ra machine LICM.
   //
   // FIXME: Re-enable coloring with register when it's capable of adding
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index e5872df..959dd7d 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -824,6 +824,12 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
     // The instruction stream may change in the loop, so check BB->end()
     // directly.
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+      // We might end up here again with a NULL iterator if we scavenged a
+      // register for which we inserted spill code for definition by what was
+      // originally the first instruction in BB.
+      if (I == MachineBasicBlock::iterator(NULL))
+        I = BB->begin();
+
       MachineInstr *MI = I;
       MachineBasicBlock::iterator J = llvm::next(I);
       MachineBasicBlock::iterator P = I == BB->begin() ?
@@ -883,8 +889,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
           "The register scavenger has an unexpected position");
         I = P;
         RS->unprocess(P);
-
-        // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I));
       } else
         ++I;
     }
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 0b6dc68..7fcfe9e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -63,7 +63,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
   MachineFunction *MF;
 
   // state
-  std::auto_ptr<Spiller> SpillerInstance;
+  OwningPtr<Spiller> SpillerInstance;
   std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
                       CompSpillWeight> Queue;
 
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6d84176..9eed1fc 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -78,7 +78,7 @@ class RAGreedy : public MachineFunctionPass,
   LiveDebugVariables *DebugVars;
 
   // state
-  std::auto_ptr<Spiller> SpillerInstance;
+  OwningPtr<Spiller> SpillerInstance;
   std::priority_queue<std::pair<unsigned, unsigned> > Queue;
   unsigned NextCascade;
 
@@ -166,8 +166,8 @@ class RAGreedy : public MachineFunctionPass,
   };
 
   // splitting state.
-  std::auto_ptr<SplitAnalysis> SA;
-  std::auto_ptr<SplitEditor> SE;
+  OwningPtr<SplitAnalysis> SA;
+  OwningPtr<SplitEditor> SE;
 
   /// Cached per-block interference maps
   InterferenceCache IntfCache;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 607edac..15a88e2 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
 #include "Spiller.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -89,8 +90,8 @@ public:
   static char ID;
 
   /// Construct a PBQP register allocator.
-  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
-      : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+  RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
@@ -121,7 +122,7 @@ private:
   typedef std::set<unsigned> RegSet;
 
 
-  std::auto_ptr<PBQPBuilder> builder;
+  OwningPtr<PBQPBuilder> builder;
 
   char *customPassID;
 
@@ -132,7 +133,7 @@ private:
   const MachineLoopInfo *loopInfo;
   MachineRegisterInfo *mri;
 
-  std::auto_ptr<Spiller> spiller;
+  OwningPtr<Spiller> spiller;
   LiveIntervals *lis;
   LiveStacks *lss;
   VirtRegMap *vrm;
@@ -186,16 +187,15 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
   return allowedSet[option - 1];
 }
 
-std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
-                                                const LiveIntervals *lis,
-                                                const MachineLoopInfo *loopInfo,
-                                                const RegSet &vregs) {
+PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
+                                  const MachineLoopInfo *loopInfo,
+                                  const RegSet &vregs) {
 
   LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
   MachineRegisterInfo *mri = &mf->getRegInfo();
   const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
 
-  std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  OwningPtr<PBQPRAProblem> p(new PBQPRAProblem());
   PBQP::Graph &g = p->getGraph();
   RegSet pregs;
 
@@ -282,7 +282,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     }
   }
 
-  return p;
+  return p.take();
 }
 
 void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
@@ -311,13 +311,12 @@ void PBQPBuilder::addInterferenceCosts(
   }
 }
 
-std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
-                                                MachineFunction *mf,
+PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
                                                 const LiveIntervals *lis,
                                                 const MachineLoopInfo *loopInfo,
                                                 const RegSet &vregs) {
 
-  std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+  OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs));
   PBQP::Graph &g = p->getGraph();
 
   const TargetMachine &tm = mf->getTarget();
@@ -391,7 +390,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
     }
   }
 
-  return p;
+  return p.take();
 }
 
 void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
@@ -584,8 +583,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
     while (!pbqpAllocComplete) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
-      std::auto_ptr<PBQPRAProblem> problem =
-        builder->build(mf, lis, loopInfo, vregsToAlloc);
+      OwningPtr<PBQPRAProblem> problem(
+        builder->build(mf, lis, loopInfo, vregsToAlloc));
 
 #ifndef NDEBUG
       if (pbqpDumpGraphs) {
@@ -621,18 +620,18 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass* llvm::createPBQPRegisterAllocator(
-                                           std::auto_ptr<PBQPBuilder> builder,
+                                           OwningPtr<PBQPBuilder> &builder,
                                            char *customPassID) {
   return new RegAllocPBQP(builder, customPassID);
 }
 
 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
-  if (pbqpCoalescing) {
-    return createPBQPRegisterAllocator(
-             std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
-  } // else
-  return createPBQPRegisterAllocator(
-           std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+  OwningPtr<PBQPBuilder> Builder;
+  if (pbqpCoalescing)
+    Builder.reset(new PBQPBuilderWithCoalescing());
+  else
+    Builder.reset(new PBQPBuilder());
+  return createPBQPRegisterAllocator(Builder);
 }
 
 #undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 07ace7a..f82ccbe 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -154,14 +154,13 @@ void RegScavenger::unprocess() {
   assert(Tracking && "Cannot unprocess because we're not tracking");
 
   MachineInstr *MI = MBBI;
-  if (MI->isDebugValue())
-    return;
-
-  determineKillsAndDefs();
+  if (!MI->isDebugValue()) {
+    determineKillsAndDefs();
 
-  // Commit the changes.
-  setUsed(KillRegs);
-  setUnused(DefRegs);
+    // Commit the changes.
+    setUsed(KillRegs);
+    setUnused(DefRegs);
+  }
 
   if (MBBI == MBB->begin()) {
     MBBI = MachineBasicBlock::iterator(NULL);
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 71e7a21..e4da6a4 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
       if (UseOp < 0)
         Dep = SDep(SU, SDep::Artificial);
       else {
+        // Set the hasPhysRegDefs only for physreg defs that have a use within
+        // the scheduling region.
+        SU->hasPhysRegDefs = true;
         Dep = SDep(SU, SDep::Data, *Alias);
         RegUse = UseSU->getInstr();
         Dep.setMinLatency(
@@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   }
 
   if (!MO.isDef()) {
+    SU->hasPhysRegUses = true;
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eb16095..2e09ec0 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -205,6 +205,7 @@ namespace {
     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTPOP(SDNode *N);
     SDValue visitSELECT(SDNode *N);
+    SDValue visitVSELECT(SDNode *N);
     SDValue visitSELECT_CC(SDNode *N);
     SDValue visitSETCC(SDNode *N);
     SDValue visitSIGN_EXTEND(SDNode *N);
@@ -243,7 +244,6 @@ namespace {
     SDValue visitCONCAT_VECTORS(SDNode *N);
     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     SDValue visitVECTOR_SHUFFLE(SDNode *N);
-    SDValue visitMEMBARRIER(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
@@ -1127,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   case ISD::CTPOP:              return visitCTPOP(N);
   case ISD::SELECT:             return visitSELECT(N);
+  case ISD::VSELECT:            return visitVSELECT(N);
   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   case ISD::SETCC:              return visitSETCC(N);
   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
@@ -1165,7 +1166,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
-  case ISD::MEMBARRIER:         return visitMEMBARRIER(N);
   }
   return SDValue();
 }
@@ -4164,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Canonicalize integer abs.
+  // vselect (setg[te] X,  0),  X, -X ->
+  // vselect (setgt    X, -1),  X, -X ->
+  // vselect (setl[te] X,  0), -X,  X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+    bool isAbs = false;
+    bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+    if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+         (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+        N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+      isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+    else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+             N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+      isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+    if (isAbs) {
+      EVT VT = LHS.getValueType();
+      SDValue Shift = DAG.getNode(
+          ISD::SRA, DL, VT, LHS,
+          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+      AddToWorkList(Shift.getNode());
+      AddToWorkList(Add.getNode());
+      return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4453,7 +4493,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   if (N0.getOpcode() == ISD::SETCC) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
     // Only do this before legalize for now.
-    if (VT.isVector() && !LegalOperations) {
+    if (VT.isVector() && !LegalOperations &&
+        TLI.getBooleanContents(true) == 
+          TargetLowering::ZeroOrNegativeOneBooleanContent) {
       EVT N0VT = N0.getOperand(0).getValueType();
       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
       // of the same size as the compared operands. Only optimize sext(setcc())
@@ -7110,25 +7152,40 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
            BasePtr.getNode() && "Expected BasePtr operand");
 
-    APInt OV =
-      cast<ConstantSDNode>(Offset)->getAPIntValue();
-    if (AM == ISD::PRE_DEC)
-      OV = -OV;
+    // We need to replace ptr0 in the following expression:
+    //   x0 * offset0 + y0 * ptr0 = t0
+    // knowing that
+    //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
+    // 
+    // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
+    // indexed load/store and the expresion that needs to be re-written.
+    //
+    // Therefore, we have:
+    //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
 
     ConstantSDNode *CN =
       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
-    APInt CNV = CN->getAPIntValue();
-    if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
-      CNV += OV;
-    else
-      CNV -= OV;
+    int X0, X1, Y0, Y1;
+    APInt Offset0 = CN->getAPIntValue();
+    APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
 
-    SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
-    SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
-    if (OffsetIdx == 0)
-      std::swap(NewOp1, NewOp2);
+    X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+    Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+    X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+    Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
 
-    SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+    unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
+
+    APInt CNV = Offset0;
+    if (X0 < 0) CNV = -CNV;
+    if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
+    else CNV = CNV - Offset1;
+
+    // We can now generate the new expression.
+    SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+    SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+
+    SDValue NewUse = DAG.getNode(Opcode,
                                  OtherUses[i]->getDebugLoc(),
                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
@@ -9065,6 +9122,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(N->getValueType(0));
 
+  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
+  // nodes often generate nop CONCAT_VECTOR nodes.
+  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
+  // place the incoming vectors at the exact same location.
+  SDValue SingleSource = SDValue();
+  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Op = N->getOperand(i);
+
+    if (Op.getOpcode() == ISD::UNDEF)
+      continue;
+
+    // Check if this is the identity extract:
+    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+      return SDValue();
+
+    // Find the single incoming vector for the extract_subvector.
+    if (SingleSource.getNode()) {
+      if (Op.getOperand(0) != SingleSource)
+        return SDValue();
+    } else {
+      SingleSource = Op.getOperand(0);
+
+      // Check the source type is the same as the type of the result.
+      // If not, this concat may extend the vector, so we can not
+      // optimize it away.
+      if (SingleSource.getValueType() != N->getValueType(0))
+        return SDValue();
+    }
+
+    unsigned IdentityIndex = i * PartNumElem;
+    ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    // The extract index must be constant.
+    if (!CS)
+      return SDValue();
+    
+    // Check that we are reading from the identity index.
+    if (CS->getZExtValue() != IdentityIndex)
+      return SDValue();
+  }
+
+  if (SingleSource.getNode())
+    return SingleSource;
+  
   return SDValue();
 }
 
@@ -9125,6 +9227,44 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   return SDValue();
 }
 
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+  SmallVector<SDValue, 4> Ops;
+  EVT ConcatVT = N0.getOperand(0).getValueType();
+  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+  unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+  // Look at every vector that's inserted. We're looking for exact
+  // subvector-sized copies from a concatenated vector
+  for (unsigned I = 0; I != NumConcats; ++I) {
+    // Make sure we're dealing with a copy.
+    unsigned Begin = I * NumElemsPerConcat;
+    if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+      return SDValue();
+
+    for (unsigned J = 1; J != NumElemsPerConcat; ++J) {
+      if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+        return SDValue();
+    }
+
+    unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
+    if (FirstElt < N0.getNumOperands())
+      Ops.push_back(N0.getOperand(FirstElt));
+    else
+      Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
+  }
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(),
+                     Ops.size());
+}
+
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -9226,6 +9366,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     }
   }
 
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+      Level < AfterLegalizeVectorOps &&
+      (N1.getOpcode() == ISD::UNDEF ||
+      (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+    SDValue V = partitionShuffleOfConcats(N, DAG);
+
+    if (V.getNode())
+      return V;
+  }
+
   // If this shuffle node is simply a swizzle of another shuffle node,
   // and it reverses the swizzle of the previous shuffle then we can
   // optimize shuffle(shuffle(x, undef), undef) -> x.
@@ -9262,59 +9413,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
-  if (!TLI.getShouldFoldAtomicFences())
-    return SDValue();
-
-  SDValue atomic = N->getOperand(0);
-  switch (atomic.getOpcode()) {
-    case ISD::ATOMIC_CMP_SWAP:
-    case ISD::ATOMIC_SWAP:
-    case ISD::ATOMIC_LOAD_ADD:
-    case ISD::ATOMIC_LOAD_SUB:
-    case ISD::ATOMIC_LOAD_AND:
-    case ISD::ATOMIC_LOAD_OR:
-    case ISD::ATOMIC_LOAD_XOR:
-    case ISD::ATOMIC_LOAD_NAND:
-    case ISD::ATOMIC_LOAD_MIN:
-    case ISD::ATOMIC_LOAD_MAX:
-    case ISD::ATOMIC_LOAD_UMIN:
-    case ISD::ATOMIC_LOAD_UMAX:
-      break;
-    default:
-      return SDValue();
-  }
-
-  SDValue fence = atomic.getOperand(0);
-  if (fence.getOpcode() != ISD::MEMBARRIER)
-    return SDValue();
-
-  switch (atomic.getOpcode()) {
-    case ISD::ATOMIC_CMP_SWAP:
-      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
-                                    fence.getOperand(0),
-                                    atomic.getOperand(1), atomic.getOperand(2),
-                                    atomic.getOperand(3)), atomic.getResNo());
-    case ISD::ATOMIC_SWAP:
-    case ISD::ATOMIC_LOAD_ADD:
-    case ISD::ATOMIC_LOAD_SUB:
-    case ISD::ATOMIC_LOAD_AND:
-    case ISD::ATOMIC_LOAD_OR:
-    case ISD::ATOMIC_LOAD_XOR:
-    case ISD::ATOMIC_LOAD_NAND:
-    case ISD::ATOMIC_LOAD_MIN:
-    case ISD::ATOMIC_LOAD_MAX:
-    case ISD::ATOMIC_LOAD_UMIN:
-    case ISD::ATOMIC_LOAD_UMAX:
-      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
-                                    fence.getOperand(0),
-                                    atomic.getOperand(1), atomic.getOperand(2)),
-                     atomic.getResNo());
-    default:
-      return SDValue();
-  }
-}
-
 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
 /// an AND to a vector_shuffle with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 9ac738e..288499a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1505,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
 
   return true;
 }
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+  assert(LI->hasOneUse() &&
+      "tryToFoldLoad expected a LoadInst with a single use");
+  // We know that the load has a single use, but don't know what it is.  If it
+  // isn't one of the folded instructions, then we can't succeed here.  Handle
+  // this by scanning the single-use users of the load until we get to FoldInst.
+  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
+
+  const Instruction *TheUser = LI->use_back();
+  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
+         // Stay in the right block.
+         TheUser->getParent() == FoldInst->getParent() &&
+         --MaxUsers) {  // Don't scan too far.
+    // If there are multiple or no uses of this instruction, then bail out.
+    if (!TheUser->hasOneUse())
+      return false;
+
+    TheUser = TheUser->use_back();
+  }
+
+  // If we didn't find the fold instruction, then we failed to collapse the
+  // sequence.
+  if (TheUser != FoldInst)
+    return false;
+
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile())
+    return false;
+
+  // Figure out which vreg this is going into.  If there is no assigned vreg yet
+  // then there actually was no reference to it.  Perhaps the load is referenced
+  // by a dead instruction.
+  unsigned LoadReg = getRegForValue(LI);
+  if (LoadReg == 0)
+    return false;
+
+  // We can't fold if this vreg has no uses or more than one use.  Multiple uses
+  // may mean that the instruction got lowered to multiple MIs, or the use of
+  // the loaded value ended up being multiple operands of the result.
+  if (!MRI.hasOneUse(LoadReg))
+    return false;
+
+  MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+  MachineInstr *User = &*RI;
+
+  // Set the insertion point properly.  Folding the load can cause generation of
+  // other random instructions (like sign extends) for addressing modes; make
+  // sure they get inserted in a logical place before the new instruction.
+  FuncInfo.InsertPt = User;
+  FuncInfo.MBB = User->getParent();
+
+  // Ask the target to try folding the load.
+  return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 51cc254..2a1d8c2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2759,8 +2759,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(DAG.getConstant(0, MVT::i32));
     Results.push_back(Node->getOperand(0));
     break;
-  case ISD::ATOMIC_FENCE:
-  case ISD::MEMBARRIER: {
+  case ISD::ATOMIC_FENCE: {
     // If the target didn't lower this, lower it to '__sync_synchronize()' call
     // FIXME: handle "fence singlethread" more efficiently.
     TargetLowering::ArgListTy Args;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index d19c13b..cd2f060 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -777,7 +777,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
                           Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
   case ISD::INSERT_VECTOR_ELT:
                           Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
-  case ISD::MEMBARRIER:   Res = PromoteIntOp_MEMBARRIER(N); break;
   case ISD::SCALAR_TO_VECTOR:
                           Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
   case ISD::VSELECT:
@@ -961,17 +960,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
                                 N->getOperand(1), Idx), 0);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
-  SDValue NewOps[6];
-  DebugLoc dl = N->getDebugLoc();
-  NewOps[0] = N->getOperand(0);
-  for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
-    SDValue Flag = GetPromotedInteger(N->getOperand(i));
-    NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
-  }
-  return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
-}
-
 SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
   // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
   // the operand in place.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 54ea926..1c4274a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -270,7 +270,6 @@ private:
   SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
-  SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
   SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
@@ -582,6 +581,7 @@ private:
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue SplitVecOp_TRUNCATE(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5ec8535..04c6bfd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1046,6 +1046,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
+    case ISD::TRUNCATE:          Res = SplitVecOp_TRUNCATE(N); break;
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1062,7 +1063,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
     case ISD::FTRUNC:
-    case ISD::TRUNCATE:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
@@ -1272,8 +1272,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   DebugLoc DL = N->getDebugLoc();
 
-  // The input operands all must have the same type, and we know the result the
-  // result type is valid.  Convert this to a buildvector which extracts all the
+  // The input operands all must have the same type, and we know the result
+  // type is valid.  Convert this to a buildvector which extracts all the
   // input elements.
   // TODO: If the input elements are power-two vectors, we could convert this to
   // a new CONCAT_VECTORS node with elements that are half-wide.
@@ -1293,6 +1293,66 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
                      &Elts[0], Elts.size());
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+  // The result type is legal, but the input type is illegal.  If splitting
+  // ends up with the result type of each half still being legal, just
+  // do that.  If, however, that would result in an illegal result type,
+  // we can try to get more clever with power-two vectors. Specifically,
+  // split the input type, but also widen the result element size, then
+  // concatenate the halves and truncate again.  For example, consider a target
+  // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+  // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+  //   %inlo = v4i32 extract_subvector %in, 0
+  //   %inhi = v4i32 extract_subvector %in, 4
+  //   %lo16 = v4i16 trunc v4i32 %inlo
+  //   %hi16 = v4i16 trunc v4i32 %inhi
+  //   %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+  //   %res = v8i8 trunc v8i16 %in16
+  //
+  // Without this transform, the original truncate would end up being
+  // scalarized, which is pretty much always a last resort.
+  SDValue InVec = N->getOperand(0);
+  EVT InVT = InVec->getValueType(0);
+  EVT OutVT = N->getValueType(0);
+  unsigned NumElements = OutVT.getVectorNumElements();
+  // Widening should have already made sure this is a power-two vector
+  // if we're trying to split it at all. assert() that's true, just in case.
+  assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+
+  unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
+  unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+
+  // If the input elements are only 1/2 the width of the result elements,
+  // just use the normal splitting. Our trick only work if there's room
+  // to split more than once.
+  if (InElementSize <= OutElementSize * 2)
+    return SplitVecOp_UnaryOp(N);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Extract the halves of the input via extract_subvector.
+  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+                                 InVT.getVectorElementType(), NumElements/2);
+  SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+                                DAG.getIntPtrConstant(0));
+  SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+                                DAG.getIntPtrConstant(NumElements/2));
+  // Truncate them to 1/2 the element size.
+  EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+                                NumElements/2);
+  SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
+  SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+  // Concatenate them to get the full intermediate truncation result.
+  EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+  SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+                                 HalfHi);
+  // Now finish up by truncating all the way down to the original result
+  // type. This should normally be something that ends up being legal directly,
+  // but in theory if a target has very wide vectors and an annoyingly
+  // restricted set of legal types, this split can chain to build things up.
+  return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6424431..15235c8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2785,7 +2785,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
   }
 
   // Handle the scalar case first.
-  if (Outputs.size() == 1)
+  if (Scalar1 && Scalar2)
     return Outputs.back();
 
   // Otherwise build a big vector out of the scalar elements we generated.
@@ -5252,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
   SDVTList VTs = getVTList(VT);
-  return getMachineNode(Opcode, dl, VTs, 0, 0);
+  return getMachineNode(Opcode, dl, VTs, None);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5267,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                              SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5275,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                              SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, 0, 0);
+  return getMachineNode(Opcode, dl, VTs, None);
 }
 
 MachineSDNode *
@@ -5296,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5304,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5313,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5330,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5339,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, EVT VT3,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
                              EVT VT2, EVT VT3, EVT VT4,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              ArrayRef<EVT> ResultTys,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> OpsArray) {
   bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
   MachineSDNode *N;
   void *IP = 0;
+  const SDValue *Ops = OpsArray.data();
+  unsigned NumOps = OpsArray.size();
 
   if (DoCSE) {
     FoldingSetNodeID ID;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ce40cd6..67db211 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -314,7 +314,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     } else {
       Ctx.emitError(ErrMsg);
     }
-    report_fatal_error("Cannot handle scalar-to-vector conversion!");
+    return DAG.getUNDEF(ValueVT);
   }
 
   if (ValueVT.getVectorNumElements() == 1 &&
@@ -5034,6 +5034,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return 0;
   }
+  case Intrinsic::annotation:
+  case Intrinsic::ptr_annotation:
+    // Drop the intrinsic, but forward the value
+    setValue(&I, getValue(I.getOperand(0)));
+    return 0;
   case Intrinsic::var_annotation:
     // Discard annotate attributes
     return 0;
@@ -5232,6 +5237,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     Entry.isSRet = true;
     Entry.isNest = false;
     Entry.isByVal = false;
+    Entry.isReturned = false;
     Entry.Alignment = Align;
     Args.push_back(Entry);
     RetTy = Type::getVoidTy(FTy->getContext());
@@ -5249,13 +5255,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     Entry.Node = ArgNode; Entry.Ty = V->getType();
 
     unsigned attrInd = i - CS.arg_begin() + 1;
-    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
-    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
-    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
-    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
-    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
-    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
-    Entry.Alignment = CS.getParamAlignment(attrInd);
+    Entry.isSExt     = CS.paramHasAttr(attrInd, Attribute::SExt);
+    Entry.isZExt     = CS.paramHasAttr(attrInd, Attribute::ZExt);
+    Entry.isInReg    = CS.paramHasAttr(attrInd, Attribute::InReg);
+    Entry.isSRet     = CS.paramHasAttr(attrInd, Attribute::StructRet);
+    Entry.isNest     = CS.paramHasAttr(attrInd, Attribute::Nest);
+    Entry.isByVal    = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
+    Entry.Alignment  = CS.getParamAlignment(attrInd);
     Args.push_back(Entry);
   }
 
@@ -6169,10 +6176,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
           MatchedRegs.RegVTs.push_back(RegVT);
           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
-               i != e; ++i)
-            MatchedRegs.Regs.push_back
-              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
-
+               i != e; ++i) {
+            if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+              MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
+            else {
+              LLVMContext &Ctx = *DAG.getContext();
+              Ctx.emitError(CS.getInstruction(), "inline asm error: This value"
+                            " type register class is not natively supported!");
+              report_fatal_error("inline asm error: This value type register "
+                                 "class is not natively supported!");
+            }
+          }
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                     Chain, &Flag, CS.getInstruction());
@@ -6389,6 +6403,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
 /// migrated to using LowerCall, this hook should be integrated into SDISel.
 std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+  // Handle the incoming return values from the call.
+  CLI.Ins.clear();
+  SmallVector<EVT, 4> RetTys;
+  ComputeValueVTs(*this, CLI.RetTy, RetTys);
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT;
+      MyFlags.Used = CLI.IsReturnValueUsed;
+      if (CLI.RetSExt)
+        MyFlags.Flags.setSExt();
+      if (CLI.RetZExt)
+        MyFlags.Flags.setZExt();
+      if (CLI.IsInReg)
+        MyFlags.Flags.setInReg();
+      CLI.Ins.push_back(MyFlags);
+    }
+  }
+
   // Handle all of the outgoing arguments.
   CLI.Outs.clear();
   CLI.OutVals.clear();
@@ -6442,6 +6478,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       else if (Args[i].isZExt)
         ExtendKind = ISD::ZERO_EXTEND;
 
+      // Conservatively only handle 'returned' on non-vectors for now
+      if (Args[i].isReturned && !Op.getValueType().isVector()) {
+        assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
+               "unexpected use of 'returned'");
+        // Before passing 'returned' to the target lowering code, ensure that
+        // either the register MVT and the actual EVT are the same size or that
+        // the return value and argument are extended in the same way; in these
+        // cases it's safe to pass the argument register value unchanged as the
+        // return register value (although it's at the target's option whether
+        // to do so)
+        // TODO: allow code generation to take advantage of partially preserved
+        // registers rather than clobbering the entire register when the
+        // parameter extension method is not compatible with the return
+        // extension method
+        if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
+            (ExtendKind != ISD::ANY_EXTEND &&
+             CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
+        Flags.setReturned();
+      }
+
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
                      PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
 
@@ -6461,28 +6517,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     }
   }
 
-  // Handle the incoming return values from the call.
-  CLI.Ins.clear();
-  SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(*this, CLI.RetTy, RetTys);
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
-      MyFlags.Used = CLI.IsReturnValueUsed;
-      if (CLI.RetSExt)
-        MyFlags.Flags.setSExt();
-      if (CLI.RetZExt)
-        MyFlags.Flags.setZExt();
-      if (CLI.IsInReg)
-        MyFlags.Flags.setInReg();
-      CLI.Ins.push_back(MyFlags);
-    }
-  }
-
   SmallVector<SDValue, 4> InVals;
   CLI.Chain = LowerCall(CLI, InVals);
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 3b5823b..47b0391 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -54,7 +54,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::DELETED_NODE:               return "<<Deleted Node!>>";
 #endif
   case ISD::PREFETCH:                   return "Prefetch";
-  case ISD::MEMBARRIER:                 return "MemBarrier";
   case ISD::ATOMIC_FENCE:               return "AtomicFence";
   case ISD::ATOMIC_CMP_SWAP:            return "AtomicCmpSwap";
   case ISD::ATOMIC_SWAP:                return "AtomicSwap";
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index eeea9e4..e21f26e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -63,12 +63,16 @@ STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
 STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
 STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+          "Number of entry blocks where fast isel failed to lower arguments");
 
 #ifndef NDEBUG
 static cl::opt<bool>
 EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
           cl::desc("Enable extra verbose messages in the \"fast\" "
                    "instruction selector"));
+
   // Terminators
 STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
 STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
@@ -742,7 +746,7 @@ public:
 } // end anonymous namespace
 
 void SelectionDAGISel::DoInstructionSelection() {
-  DEBUG(errs() << "===== Instruction selection begins: BB#"
+  DEBUG(dbgs() << "===== Instruction selection begins: BB#"
         << FuncInfo->MBB->getNumber()
         << " '" << FuncInfo->MBB->getName() << "'\n");
 
@@ -801,7 +805,7 @@ void SelectionDAGISel::DoInstructionSelection() {
     CurDAG->setRoot(Dummy.getValue());
   }
 
-  DEBUG(errs() << "===== Instruction selection ends:\n");
+  DEBUG(dbgs() << "===== Instruction selection ends:\n");
 
   PostprocessISelDAG();
 }
@@ -831,84 +835,6 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   if (Reg) MBB->addLiveIn(Reg);
 }
 
-/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
-/// load into the specified FoldInst.  Note that we could have a sequence where
-/// multiple LLVM IR instructions are folded into the same machineinstr.  For
-/// example we could have:
-///   A: x = load i32 *P
-///   B: y = icmp A, 42
-///   C: br y, ...
-///
-/// In this scenario, LI is "A", and FoldInst is "C".  We know about "B" (and
-/// any other folded instructions) because it is between A and C.
-///
-/// If we succeed in folding the load into the operation, return true.
-///
-bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
-                                             const Instruction *FoldInst,
-                                             FastISel *FastIS) {
-  // We know that the load has a single use, but don't know what it is.  If it
-  // isn't one of the folded instructions, then we can't succeed here.  Handle
-  // this by scanning the single-use users of the load until we get to FoldInst.
-  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
-
-  const Instruction *TheUser = LI->use_back();
-  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
-         // Stay in the right block.
-         TheUser->getParent() == FoldInst->getParent() &&
-         --MaxUsers) {  // Don't scan too far.
-    // If there are multiple or no uses of this instruction, then bail out.
-    if (!TheUser->hasOneUse())
-      return false;
-
-    TheUser = TheUser->use_back();
-  }
-
-  // If we didn't find the fold instruction, then we failed to collapse the
-  // sequence.
-  if (TheUser != FoldInst)
-    return false;
-
-  // Don't try to fold volatile loads.  Target has to deal with alignment
-  // constraints.
-  if (LI->isVolatile()) return false;
-
-  // Figure out which vreg this is going into.  If there is no assigned vreg yet
-  // then there actually was no reference to it.  Perhaps the load is referenced
-  // by a dead instruction.
-  unsigned LoadReg = FastIS->getRegForValue(LI);
-  if (LoadReg == 0)
-    return false;
-
-  // Check to see what the uses of this vreg are.  If it has no uses, or more
-  // than one use (at the machine instr level) then we can't fold it.
-  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
-  if (RI == RegInfo->reg_end())
-    return false;
-
-  // See if there is exactly one use of the vreg.  If there are multiple uses,
-  // then the instruction got lowered to multiple machine instructions or the
-  // use of the loaded value ended up being multiple operands of the result, in
-  // either case, we can't fold this.
-  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
-  if (PostRI != RegInfo->reg_end())
-    return false;
-
-  assert(RI.getOperand().isUse() &&
-         "The only use of the vreg must be a use, we haven't emitted the def!");
-
-  MachineInstr *User = &*RI;
-
-  // Set the insertion point properly.  Folding the load can cause generation of
-  // other random instructions (like sign extends) for addressing modes, make
-  // sure they get inserted in a logical place before the new instruction.
-  FuncInfo->InsertPt = User;
-  FuncInfo->MBB = User->getParent();
-
-  // Ask the target to try folding the load.
-  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
-}
-
 /// isFoldedOrDeadInstruction - Return true if the specified instruction is
 /// side-effect free and is either dead or folded into a generated instruction.
 /// Return false if it needs to be emitted.
@@ -1054,9 +980,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
+        ++NumEntryBlocks;
+
         // Lower any arguments needed in this block if this is the entry block.
         if (!FastIS->LowerArguments()) {
           // Fast isel failed to lower these arguments
+          ++NumFastIselFailLowerArguments;
           if (EnableFastISelAbortArgs)
             llvm_unreachable("FastISel didn't lower all arguments");
 
@@ -1106,7 +1035,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
           if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
               BeforeInst->hasOneUse() &&
-              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
+              FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
             // If we succeeded, don't re-select the load.
             BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
             --NumFastIselRemaining;
@@ -1178,8 +1107,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FastIS->recomputeInsertPt();
     } else {
       // Lower any arguments needed in this block if this is the entry block.
-      if (LLVMBB == &Fn.getEntryBlock())
+      if (LLVMBB == &Fn.getEntryBlock()) {
+        ++NumEntryBlocks;
         LowerArguments(Fn);
+      }
     }
 
     if (Begin != BI)
@@ -1771,7 +1702,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
   if (!NowDeadNodes.empty())
     CurDAG->RemoveDeadNodes(NowDeadNodes);
 
-  DEBUG(errs() << "ISEL: Match complete!\n");
+  DEBUG(dbgs() << "ISEL: Match complete!\n");
 }
 
 enum ChainResult {
@@ -2276,9 +2207,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   SmallVector<SDNode*, 3> ChainNodesMatched;
   SmallVector<SDNode*, 3> GlueResultNodesMatched;
 
-  DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+  DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
         NodeToMatch->dump(CurDAG);
-        errs() << '\n');
+        dbgs() << '\n');
 
   // Determine where to start the interpreter.  Normally we start at opcode #0,
   // but if the state machine starts with an OPC_SwitchOpcode, then we
@@ -2290,7 +2221,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     // Already computed the OpcodeOffset table, just index into it.
     if (N.getOpcode() < OpcodeOffset.size())
       MatcherIndex = OpcodeOffset[N.getOpcode()];
-    DEBUG(errs() << "  Initial Opcode index to " << MatcherIndex << "\n");
+    DEBUG(dbgs() << "  Initial Opcode index to " << MatcherIndex << "\n");
 
   } else if (MatcherTable[0] == OPC_SwitchOpcode) {
     // Otherwise, the table isn't computed, but the state machine does start
@@ -2357,7 +2288,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         if (!Result)
           break;
 
-        DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
+        DEBUG(dbgs() << "  Skipped scope entry (due to false predicate) at "
                      << "index " << MatcherIndexOfPredicate
                      << ", continuing at " << FailIndex << "\n");
         ++NumDAGIselRetries;
@@ -2487,7 +2418,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (CaseSize == 0) break;
 
       // Otherwise, execute the case we found.
-      DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
+      DEBUG(dbgs() << "  OpcodeSwitch from " << SwitchStart
                    << " to " << MatcherIndex << "\n");
       continue;
     }
@@ -2519,7 +2450,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (CaseSize == 0) break;
 
       // Otherwise, execute the case we found.
-      DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+      DEBUG(dbgs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
                    << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
       continue;
     }
@@ -2787,7 +2718,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         // If this is a normal EmitNode command, just create the new node and
         // add the results to the RecordedNodes list.
         Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
-                                     VTList, Ops.data(), Ops.size());
+                                     VTList, Ops);
 
         // Add all the non-glue/non-chain results to the RecordedNodes list.
         for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
@@ -2863,9 +2794,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
           ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
       }
 
-      DEBUG(errs() << "  "
+      DEBUG(dbgs() << "  "
                    << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
-                   << " node: "; Res->dump(CurDAG); errs() << "\n");
+                   << " node: "; Res->dump(CurDAG); dbgs() << "\n");
 
       // If this was a MorphNodeTo then we're completely done!
       if (Opcode == OPC_MorphNodeTo) {
@@ -2940,7 +2871,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     // If the code reached this point, then the match failed.  See if there is
     // another child to try in the current 'Scope', otherwise pop it until we
     // find a case to check.
-    DEBUG(errs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
+    DEBUG(dbgs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
     ++NumDAGIselRetries;
     while (1) {
       if (MatchScopes.empty()) {
@@ -2960,7 +2891,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
       MatcherIndex = LastScope.FailIndex;
 
-      DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
+      DEBUG(dbgs() << "  Continuing at " << MatcherIndex << "\n");
 
       InputChain = LastScope.InputChain;
       InputGlue = LastScope.InputGlue;
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
index 9ab4918..2feea59 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -70,14 +70,14 @@ ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
 
 // Debugging level for shrink wrapping.
 enum ShrinkWrapDebugLevel {
-  None, BasicInfo, Iterations, Details
+  Disabled, BasicInfo, Iterations, Details
 };
 
 static cl::opt<enum ShrinkWrapDebugLevel>
 ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
   cl::desc("Print shrink wrapping debugging information"),
   cl::values(
-    clEnumVal(None      , "disable debug output"),
+    clEnumVal(Disabled  , "disable debug output"),
     clEnumVal(BasicInfo , "print basic DF sets"),
     clEnumVal(Iterations, "print SR sets for each iteration"),
     clEnumVal(Details   , "print all DF sets"),
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f42bdbd..8074d16 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -620,12 +620,55 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
                                        const TargetLoweringObjectFile *tlof)
   : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+  initActions();
+
+  // Perform these initializations only once.
+  IsLittleEndian = TD->isLittleEndian();
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+  MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+  MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+    = MaxStoresPerMemmoveOptSize = 4;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
+  PredictableSelectIsExpensive = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  BooleanContents = UndefinedBooleanContent;
+  BooleanVectorContents = UndefinedBooleanContent;
+  SchedPreferenceInfo = Sched::ILP;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  MinFunctionAlignment = 0;
+  PrefFunctionAlignment = 0;
+  PrefLoopAlignment = 0;
+  MinStackArgumentAlignment = 1;
+  InsertFencesForAtomic = false;
+  SupportJumpTables = true;
+  MinimumJumpTableEntries = 4;
+
+  InitLibcallNames(LibcallRoutineNames, TM);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+  delete &TLOF;
+}
+
+void TargetLoweringBase::initActions() {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
   memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
   memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
   memset(CondCodeActions, 0, sizeof(CondCodeActions));
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
 
   // Set default actions for various operations.
   for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
@@ -702,45 +745,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
   // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
   //
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
-
-  IsLittleEndian = TD->isLittleEndian();
-  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
-  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
-  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
-  MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
-  MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
-    = MaxStoresPerMemmoveOptSize = 4;
-  UseUnderscoreSetJmp = false;
-  UseUnderscoreLongJmp = false;
-  SelectIsExpensive = false;
-  IntDivIsCheap = false;
-  Pow2DivIsCheap = false;
-  JumpIsExpensive = false;
-  PredictableSelectIsExpensive = false;
-  StackPointerRegisterToSaveRestore = 0;
-  ExceptionPointerRegister = 0;
-  ExceptionSelectorRegister = 0;
-  BooleanContents = UndefinedBooleanContent;
-  BooleanVectorContents = UndefinedBooleanContent;
-  SchedPreferenceInfo = Sched::ILP;
-  JumpBufSize = 0;
-  JumpBufAlignment = 0;
-  MinFunctionAlignment = 0;
-  PrefFunctionAlignment = 0;
-  PrefLoopAlignment = 0;
-  MinStackArgumentAlignment = 1;
-  ShouldFoldAtomicFences = false;
-  InsertFencesForAtomic = false;
-  SupportJumpTables = true;
-  MinimumJumpTableEntries = 4;
-
-  InitLibcallNames(LibcallRoutineNames, TM);
-  InitCmpLibcallCCs(CmpLibcallCCs);
-  InitLibcallCallingConvs(LibcallCallingConvs);
-}
-
-TargetLoweringBase::~TargetLoweringBase() {
-  delete &TLOF;
 }
 
 MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3bdca4c..7e7359a 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -523,11 +523,6 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 const MCSection *TargetLoweringObjectFileMachO::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
-
-  // Handle thread local data.
-  if (Kind.isThreadBSS()) return TLSBSSSection;
-  if (Kind.isThreadData()) return TLSDataSection;
-
   if (Kind.isText())
     return GV->isWeakForLinker() ? TextCoalSection : TextSection;
 
@@ -580,6 +575,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (Kind.isBSSLocal())
     return DataBSSSection;
 
+  // Handle thread local data.
+  if (Kind.isThreadBSS()) return TLSBSSSection;
+  if (Kind.isThreadData()) return TLSDataSection;
+
   // Otherwise, just drop the variable in the normal data section.
   return DataSection;
 }
@@ -782,3 +781,49 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   return getDataSection();
 }
 
+void TargetLoweringObjectFileCOFF::
+emitModuleFlags(MCStreamer &Streamer,
+                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                Mangler *Mang, const TargetMachine &TM) const {
+  MDNode *LinkerOptions = 0;
+
+  // Look for the "Linker Options" flag, since it's the only one we support.
+  for (ArrayRef<Module::ModuleFlagEntry>::iterator
+       i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+    const Module::ModuleFlagEntry &MFE = *i;
+    StringRef Key = MFE.Key->getString();
+    Value *Val = MFE.Val;
+    if (Key == "Linker Options") {
+      LinkerOptions = cast<MDNode>(Val);
+      break;
+    }
+  }
+  if (!LinkerOptions)
+    return;
+
+  // Emit the linker options to the linker .drectve section.  According to the
+  // spec, this section is a space-separated string containing flags for linker.
+  const MCSection *Sec = getDrectveSection();
+  Streamer.SwitchSection(Sec);
+  for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+    MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+    for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+      MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+      StringRef Op = MDOption->getString();
+      // Lead with a space for consistency with our dllexport implementation.
+      std::string Escaped(" ");
+      if (Op.find(" ") != StringRef::npos) {
+        // The PE-COFF spec says args with spaces must be quoted.  It doesn't say
+        // how to escape quotes, but it probably uses this algorithm:
+        // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx
+        // FIXME: Reuse escaping code from Support/Windows/Program.inc
+        Escaped.push_back('\"');
+        Escaped.append(Op);
+        Escaped.push_back('\"');
+      } else {
+        Escaped.append(Op);
+      }
+      Streamer.EmitBytes(Escaped);
+    }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 0f59d01..435a5e7 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -50,3 +50,29 @@ StringRef TargetOptions::getTrapFunctionName() const {
   return TrapFuncName;
 }
 
+bool TargetOptions::operator==(const TargetOptions &TO) {
+#define ARE_EQUAL(X) X == TO.X
+  return
+    ARE_EQUAL(UnsafeFPMath) &&
+    ARE_EQUAL(NoInfsFPMath) &&
+    ARE_EQUAL(NoNaNsFPMath) &&
+    ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
+    ARE_EQUAL(UseSoftFloat) &&
+    ARE_EQUAL(NoZerosInBSS) &&
+    ARE_EQUAL(JITExceptionHandling) &&
+    ARE_EQUAL(JITEmitDebugInfo) &&
+    ARE_EQUAL(JITEmitDebugInfoToDisk) &&
+    ARE_EQUAL(GuaranteedTailCallOpt) &&
+    ARE_EQUAL(DisableTailCalls) &&
+    ARE_EQUAL(StackAlignmentOverride) &&
+    ARE_EQUAL(RealignStack) &&
+    ARE_EQUAL(SSPBufferSize) &&
+    ARE_EQUAL(EnableFastISel) &&
+    ARE_EQUAL(PositionIndependentExecutable) &&
+    ARE_EQUAL(EnableSegmentedStacks) &&
+    ARE_EQUAL(UseInitArray) &&
+    ARE_EQUAL(TrapFuncName) &&
+    ARE_EQUAL(FloatABIType) &&
+    ARE_EQUAL(AllowFPOpFusion);
+#undef ARE_EQUAL
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 783bfa1..1bf14db 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -128,6 +128,8 @@ resolveSchedClass(const MachineInstr *MI) const {
   // Get the definition's scheduling class descriptor from this machine model.
   unsigned SchedClass = MI->getDesc().getSchedClass();
   const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+  if (!SCDesc->isValid())
+    return SCDesc;
 
 #ifndef NDEBUG
   unsigned NIter = 0;
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index e6dfe10..7ca2bee 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -43,6 +43,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -58,6 +59,12 @@ STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
 STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
 STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
+// Temporary flag to disable rescheduling.
+static cl::opt<bool>
+EnableRescheduling("twoaddr-reschedule",
+                   cl::desc("Coalesce copies by rescheduling (default=true)"),
+                   cl::init(true), cl::Hidden);
+
 namespace {
 class TwoAddressInstructionPass : public MachineFunctionPass {
   MachineFunction *MF;
@@ -426,10 +433,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
 /// isTwoAddrUse - Return true if the specified MI uses the specified register
 /// as a two-address use. If so, return the destination register by reference.
 static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  unsigned NumOps = MI.isInlineAsm()
-    ? MI.getNumOperands() : MCID.getNumOperands();
-  for (unsigned i = 0; i != NumOps; ++i) {
+  for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
       continue;
@@ -1144,7 +1148,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
-  if (rescheduleMIBelowKill(mi, nmi, regB)) {
+  if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
     ++NumReSchedDowns;
     return true;
   }
@@ -1163,7 +1167,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule it before this MI if it's legal.
-  if (rescheduleKillAboveMI(mi, nmi, regB)) {
+  if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
     ++NumReSchedUps;
     return true;
   }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
index e3e4ccd..4f0eed4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -9,7 +9,7 @@
 
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -165,7 +165,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
   // we were told to parse
 
   const uint8_t *fixed_form_sizes =
-    DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize());
+    DWARFFormValue::getFixedFormSizes(getAddressByteSize(), getVersion());
 
   while (offset < next_cu_offset &&
          die.extractFast(this, fixed_form_sizes, &offset)) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
index 9e19310..9f52133 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
@@ -9,6 +9,9 @@
 
 #include "DWARFContext.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
@@ -107,36 +110,43 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
   }
 
   if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) {
-    OS << "\n.debug_abbrev.dwo contents:\n";
-    getDebugAbbrevDWO()->dump(OS);
+    const DWARFDebugAbbrev *D = getDebugAbbrevDWO();
+    if (D) {
+      OS << "\n.debug_abbrev.dwo contents:\n";
+      getDebugAbbrevDWO()->dump(OS);
+    }
   }
 
-  if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) {
-    OS << "\n.debug_info.dwo contents:\n";
-    for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
-      getDWOCompileUnitAtIndex(i)->dump(OS);
-  }
+  if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo)
+    if (getNumDWOCompileUnits()) {
+      OS << "\n.debug_info.dwo contents:\n";
+      for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
+        getDWOCompileUnitAtIndex(i)->dump(OS);
+    }
 
-  if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) {
-    OS << "\n.debug_str.dwo contents:\n";
-    DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
-    offset = 0;
-    uint32_t strDWOOffset = 0;
-    while (const char *s = strDWOData.getCStr(&offset)) {
-      OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
-      strDWOOffset = offset;
+  if (DumpType == DIDT_All || DumpType == DIDT_StrDwo)
+    if (!getStringDWOSection().empty()) {
+      OS << "\n.debug_str.dwo contents:\n";
+      DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
+      offset = 0;
+      uint32_t strDWOOffset = 0;
+      while (const char *s = strDWOData.getCStr(&offset)) {
+        OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
+        strDWOOffset = offset;
+      }
     }
-  }
 
-  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) {
-    OS << "\n.debug_str_offsets.dwo contents:\n";
-    DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
-    offset = 0;
-    while (offset < getStringOffsetDWOSection().size()) {
-      OS << format("0x%8.8x: ", offset);
-      OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo)
+    if (!getStringOffsetDWOSection().empty()) {
+      OS << "\n.debug_str_offsets.dwo contents:\n";
+      DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
+      offset = 0;
+      uint64_t size = getStringOffsetDWOSection().size();
+      while (offset < size) {
+        OS << format("0x%8.8x: ", offset);
+        OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+      }
     }
-  }
 }
 
 const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
@@ -482,6 +492,22 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
   return InliningInfo;
 }
 
+static bool consumeCompressedDebugSectionHeader(StringRef &data,
+                                                uint64_t &OriginalSize) {
+  // Consume "ZLIB" prefix.
+  if (!data.startswith("ZLIB"))
+    return false;
+  data = data.substr(4);
+  // Consume uncompressed section size (big-endian 8 bytes).
+  DataExtractor extractor(data, false, 8);
+  uint32_t Offset = 0;
+  OriginalSize = extractor.getU64(&Offset);
+  if (Offset == 0)
+    return false;
+  data = data.substr(Offset);
+  return true;
+}
+
 DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
   IsLittleEndian(Obj->isLittleEndian()),
   AddressSize(Obj->getBytesInAddress()) {
@@ -495,49 +521,55 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
     i->getContents(data);
 
     name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
-    if (name == "debug_info")
-      InfoSection = data;
-    else if (name == "debug_abbrev")
-      AbbrevSection = data;
-    else if (name == "debug_line")
-      LineSection = data;
-    else if (name == "debug_aranges")
-      ARangeSection = data;
-    else if (name == "debug_frame")
-      DebugFrameSection = data;
-    else if (name == "debug_str")
-      StringSection = data;
-    else if (name == "debug_ranges") {
+
+    // Check if debug info section is compressed with zlib.
+    if (name.startswith("zdebug_")) {
+      uint64_t OriginalSize;
+      if (!zlib::isAvailable() ||
+          !consumeCompressedDebugSectionHeader(data, OriginalSize))
+        continue;
+      OwningPtr<MemoryBuffer> UncompressedSection;
+      if (zlib::uncompress(data, UncompressedSection, OriginalSize) !=
+          zlib::StatusOK)
+        continue;
+      // Make data point to uncompressed section contents and save its contents.
+      name = name.substr(1);
+      data = UncompressedSection->getBuffer();
+      UncompressedSections.push_back(UncompressedSection.take());
+    }
+
+    StringRef *Section = StringSwitch<StringRef*>(name)
+        .Case("debug_info", &InfoSection)
+        .Case("debug_abbrev", &AbbrevSection)
+        .Case("debug_line", &LineSection)
+        .Case("debug_aranges", &ARangeSection)
+        .Case("debug_frame", &DebugFrameSection)
+        .Case("debug_str", &StringSection)
+        .Case("debug_ranges", &RangeSection)
+        .Case("debug_pubnames", &PubNamesSection)
+        .Case("debug_info.dwo", &InfoDWOSection)
+        .Case("debug_abbrev.dwo", &AbbrevDWOSection)
+        .Case("debug_str.dwo", &StringDWOSection)
+        .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+        .Case("debug_addr", &AddrSection)
+        // Any more debug info sections go here.
+        .Default(0);
+    if (!Section)
+      continue;
+    *Section = data;
+    if (name == "debug_ranges") {
       // FIXME: Use the other dwo range section when we emit it.
       RangeDWOSection = data;
-      RangeSection = data;
     }
-    else if (name == "debug_pubnames")
-      PubNamesSection = data;
-    else if (name == "debug_info.dwo")
-      InfoDWOSection = data;
-    else if (name == "debug_abbrev.dwo")
-      AbbrevDWOSection = data;
-    else if (name == "debug_str.dwo")
-      StringDWOSection = data;
-    else if (name == "debug_str_offsets.dwo")
-      StringOffsetDWOSection = data;
-    else if (name == "debug_addr")
-      AddrSection = data;
-    // Any more debug info sections go here.
-    else
-      continue;
 
     // TODO: Add support for relocations in other sections as needed.
     // Record relocations for the debug_info and debug_line sections.
-    RelocAddrMap *Map;
-    if (name == "debug_info")
-      Map = &InfoRelocMap;
-    else if (name == "debug_info.dwo")
-      Map = &InfoDWORelocMap;
-    else if (name == "debug_line")
-      Map = &LineRelocMap;
-    else
+    RelocAddrMap *Map = StringSwitch<RelocAddrMap*>(name)
+        .Case("debug_info", &InfoRelocMap)
+        .Case("debug_info.dwo", &InfoDWORelocMap)
+        .Case("debug_line", &LineRelocMap)
+        .Default(0);
+    if (!Map)
       continue;
 
     if (i->begin_relocations() != i->end_relocations()) {
@@ -547,7 +579,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
              reloc_e = i->end_relocations();
            reloc_i != reloc_e; reloc_i.increment(ec)) {
         uint64_t Address;
-        reloc_i->getAddress(Address);
+        reloc_i->getOffset(Address);
         uint64_t Type;
         reloc_i->getType(Type);
         uint64_t SymAddr = 0;
@@ -593,4 +625,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
   }
 }
 
+DWARFContextInMemory::~DWARFContextInMemory() {
+  DeleteContainerPointers(UncompressedSections);
+}
+
 void DWARFContextInMemory::anchor() { }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.h b/contrib/llvm/lib/DebugInfo/DWARFContext.h
index 37b2729..78c18e6 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFContext.h
+++ b/contrib/llvm/lib/DebugInfo/DWARFContext.h
@@ -161,8 +161,11 @@ class DWARFContextInMemory : public DWARFContext {
   StringRef RangeDWOSection;
   StringRef AddrSection;
 
+  SmallVector<MemoryBuffer*, 4> UncompressedSections;
+
 public:
   DWARFContextInMemory(object::ObjectFile *);
+  ~DWARFContextInMemory();
   virtual bool isLittleEndian() const { return IsLittleEndian; }
   virtual uint8_t getAddressSize() const { return AddressSize; }
   virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 02b15d6..10be7b4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -11,7 +11,7 @@
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
 #include "DWARFDebugAbbrev.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -94,279 +94,87 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
   OS << ")\n";
 }
 
-bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
-                                             const uint8_t *fixed_form_sizes,
-                                             uint32_t *offset_ptr) {
-  Offset = *offset_ptr;
-
-  DataExtractor debug_info_data = cu->getDebugInfoExtractor();
-  uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr);
-
-  assert(fixed_form_sizes); // For best performance this should be specified!
-
-  if (abbrCode) {
-    uint32_t offset = *offset_ptr;
-
-    AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
-    // Skip all data in the .debug_info for the attributes
-    const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
-    uint32_t i;
-    uint16_t form;
-    for (i=0; i<numAttributes; ++i) {
-
-      form = AbbrevDecl->getFormByIndex(i);
-
-      // FIXME: Currently we're checking if this is less than the last
-      // entry in the fixed_form_sizes table, but this should be changed
-      // to use dynamic dispatch.
-      const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ?
-                                       fixed_form_sizes[form] : 0;
-      if (fixed_skip_size)
-        offset += fixed_skip_size;
-      else {
-        bool form_is_indirect = false;
-        do {
-          form_is_indirect = false;
-          uint32_t form_size = 0;
-          switch (form) {
-          // Blocks if inlined data that have a length field and the data bytes
-          // inlined in the .debug_info.
-          case DW_FORM_exprloc:
-          case DW_FORM_block:
-            form_size = debug_info_data.getULEB128(&offset);
-            break;
-          case DW_FORM_block1:
-            form_size = debug_info_data.getU8(&offset);
-            break;
-          case DW_FORM_block2:
-            form_size = debug_info_data.getU16(&offset);
-            break;
-          case DW_FORM_block4:
-            form_size = debug_info_data.getU32(&offset);
-            break;
-
-          // Inlined NULL terminated C-strings
-          case DW_FORM_string:
-            debug_info_data.getCStr(&offset);
-            break;
-
-          // Compile unit address sized values
-          case DW_FORM_addr:
-          case DW_FORM_ref_addr:
-            form_size = cu->getAddressByteSize();
-            break;
-
-          // 0 sized form.
-          case DW_FORM_flag_present:
-            form_size = 0;
-            break;
-
-          // 1 byte values
-          case DW_FORM_data1:
-          case DW_FORM_flag:
-          case DW_FORM_ref1:
-            form_size = 1;
-            break;
-
-          // 2 byte values
-          case DW_FORM_data2:
-          case DW_FORM_ref2:
-            form_size = 2;
-            break;
-
-          // 4 byte values
-          case DW_FORM_strp:
-          case DW_FORM_data4:
-          case DW_FORM_ref4:
-            form_size = 4;
-            break;
-
-          // 8 byte values
-          case DW_FORM_data8:
-          case DW_FORM_ref8:
-          case DW_FORM_ref_sig8:
-            form_size = 8;
-            break;
-
-          // signed or unsigned LEB 128 values
-          case DW_FORM_sdata:
-          case DW_FORM_udata:
-          case DW_FORM_ref_udata:
-          case DW_FORM_GNU_str_index:
-          case DW_FORM_GNU_addr_index:
-            debug_info_data.getULEB128(&offset);
-            break;
-
-          case DW_FORM_indirect:
-            form_is_indirect = true;
-            form = debug_info_data.getULEB128(&offset);
-            break;
-
-            // FIXME: 64-bit for DWARF64
-          case DW_FORM_sec_offset:
-            debug_info_data.getU32(offset_ptr);
-            break;
-
-          default:
-            *offset_ptr = Offset;
-            return false;
-          }
-          offset += form_size;
-        } while (form_is_indirect);
-      }
-    }
-    *offset_ptr = offset;
-    return true;
-  } else {
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *CU,
+                                             const uint8_t *FixedFormSizes,
+                                             uint32_t *OffsetPtr) {
+  Offset = *OffsetPtr;
+  DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+  uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+  if (0 == AbbrCode) {
+    // NULL debug tag entry.
     AbbrevDecl = NULL;
-    return true; // NULL debug tag entry
+    return true;
+  }
+  AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  assert(AbbrevDecl);
+  assert(FixedFormSizes); // For best performance this should be specified!
+
+  // Skip all data in the .debug_info for the attributes
+  for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+    uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+    // FIXME: Currently we're checking if this is less than the last
+    // entry in the fixed_form_sizes table, but this should be changed
+    // to use dynamic dispatch.
+    uint8_t FixedFormSize =
+        (Form < DW_FORM_ref_sig8) ? FixedFormSizes[Form] : 0;
+    if (FixedFormSize)
+      *OffsetPtr += FixedFormSize;
+    else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+                                        CU)) {
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
+    }
   }
+  return true;
 }
 
 bool
-DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
-                                    uint32_t *offset_ptr) {
-  DataExtractor debug_info_data = cu->getDebugInfoExtractor();
-  const uint32_t cu_end_offset = cu->getNextCompileUnitOffset();
-  const uint8_t cu_addr_size = cu->getAddressByteSize();
-  uint32_t offset = *offset_ptr;
-  if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) {
-    Offset = offset;
-
-    uint64_t abbrCode = debug_info_data.getULEB128(&offset);
-
-    if (abbrCode) {
-      AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
-      if (AbbrevDecl) {
-        uint16_t tag = AbbrevDecl->getTag();
-
-        bool isCompileUnitTag = tag == DW_TAG_compile_unit;
-        if(cu && isCompileUnitTag)
-          const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0);
-
-        // Skip all data in the .debug_info for the attributes
-        const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
-        for (uint32_t i = 0; i != numAttributes; ++i) {
-          uint16_t attr = AbbrevDecl->getAttrByIndex(i);
-          uint16_t form = AbbrevDecl->getFormByIndex(i);
-
-          if (isCompileUnitTag &&
-              ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) {
-            DWARFFormValue form_value(form);
-            if (form_value.extractValue(debug_info_data, &offset, cu)) {
-              if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc)
-                const_cast<DWARFCompileUnit*>(cu)
-                  ->setBaseAddress(form_value.getUnsigned());
-            }
-          } else {
-            bool form_is_indirect = false;
-            do {
-              form_is_indirect = false;
-              register uint32_t form_size = 0;
-              switch (form) {
-              // Blocks if inlined data that have a length field and the data
-              // bytes // inlined in the .debug_info
-              case DW_FORM_exprloc:
-              case DW_FORM_block:
-                form_size = debug_info_data.getULEB128(&offset);
-                break;
-              case DW_FORM_block1:
-                form_size = debug_info_data.getU8(&offset);
-                break;
-              case DW_FORM_block2:
-                form_size = debug_info_data.getU16(&offset);
-                break;
-              case DW_FORM_block4:
-                form_size = debug_info_data.getU32(&offset);
-                break;
-
-              // Inlined NULL terminated C-strings
-              case DW_FORM_string:
-                debug_info_data.getCStr(&offset);
-                break;
-
-              // Compile unit address sized values
-              case DW_FORM_addr:
-              case DW_FORM_ref_addr:
-                form_size = cu_addr_size;
-                break;
-
-              // 0 byte value
-              case DW_FORM_flag_present:
-                form_size = 0;
-                break;
-
-              // 1 byte values
-              case DW_FORM_data1:
-              case DW_FORM_flag:
-              case DW_FORM_ref1:
-                form_size = 1;
-                break;
-
-              // 2 byte values
-              case DW_FORM_data2:
-              case DW_FORM_ref2:
-                form_size = 2;
-                break;
-
-                // 4 byte values
-              case DW_FORM_strp:
-                form_size = 4;
-                break;
-
-              case DW_FORM_data4:
-              case DW_FORM_ref4:
-                form_size = 4;
-                break;
-
-              // 8 byte values
-              case DW_FORM_data8:
-              case DW_FORM_ref8:
-              case DW_FORM_ref_sig8:
-                form_size = 8;
-                break;
-
-              // signed or unsigned LEB 128 values
-              case DW_FORM_sdata:
-              case DW_FORM_udata:
-              case DW_FORM_ref_udata:
-              case DW_FORM_GNU_str_index:
-              case DW_FORM_GNU_addr_index:
-                debug_info_data.getULEB128(&offset);
-                break;
-
-              case DW_FORM_indirect:
-                form = debug_info_data.getULEB128(&offset);
-                form_is_indirect = true;
-                break;
-
-                // FIXME: 64-bit for DWARF64.
-              case DW_FORM_sec_offset:
-                debug_info_data.getU32(offset_ptr);
-                break;
-
-              default:
-                *offset_ptr = offset;
-                return false;
-              }
-
-              offset += form_size;
-            } while (form_is_indirect);
-          }
-        }
-        *offset_ptr = offset;
-        return true;
+DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU,
+                                    uint32_t *OffsetPtr) {
+  DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+  const uint32_t CUEndOffset = CU->getNextCompileUnitOffset();
+  Offset = *OffsetPtr;
+  if ((Offset >= CUEndOffset) || !DebugInfoData.isValidOffset(Offset))
+    return false;
+  uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+  if (0 == AbbrCode) {
+    // NULL debug tag entry.
+    AbbrevDecl = NULL;
+    return true;
+  }
+  AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  if (0 == AbbrevDecl) {
+    // Restore the original offset.
+    *OffsetPtr = Offset;
+    return false;
+  }
+  bool IsCompileUnitTag = (AbbrevDecl->getTag() == DW_TAG_compile_unit);
+  if (IsCompileUnitTag)
+    const_cast<DWARFCompileUnit*>(CU)->setBaseAddress(0);
+
+  // Skip all data in the .debug_info for the attributes
+  for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+    uint16_t Attr = AbbrevDecl->getAttrByIndex(i);
+    uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+    if (IsCompileUnitTag &&
+        ((Attr == DW_AT_entry_pc) || (Attr == DW_AT_low_pc))) {
+      DWARFFormValue FormValue(Form);
+      if (FormValue.extractValue(DebugInfoData, OffsetPtr, CU)) {
+        if (Attr == DW_AT_low_pc || Attr == DW_AT_entry_pc)
+          const_cast<DWARFCompileUnit*>(CU)
+            ->setBaseAddress(FormValue.getUnsigned());
       }
-    } else {
-      AbbrevDecl = NULL;
-      *offset_ptr = offset;
-      return true;    // NULL debug tag entry
+    } else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+                                          CU)) {
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
     }
   }
-
-  return false;
+  return true;
 }
 
 bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const {
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
index 9c1b2be..9003591 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -45,12 +45,17 @@ public:
                      uint32_t *offset_ptr, uint16_t attr, uint16_t form,
                      unsigned indent = 0) const;
 
-  bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes,
-                   uint32_t *offset_ptr);
+  /// Extracts a debug info entry, which is a child of a given compile unit,
+  /// starting at a given offset. If DIE can't be extracted, returns false and
+  /// doesn't change OffsetPtr.
+  bool extractFast(const DWARFCompileUnit *CU, const uint8_t *FixedFormSizes,
+                   uint32_t *OffsetPtr);
 
   /// Extract a debug info entry for a given compile unit from the
   /// .debug_info and .debug_abbrev data starting at the given offset.
-  bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr);
+  /// If compile unit can't be parsed, returns false and doesn't change
+  /// OffsetPtr.
+  bool extract(const DWARFCompileUnit *CU, uint32_t *OffsetPtr);
 
   uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
   bool isNULL() const { return AbbrevDecl == 0; }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
index 9f807aa..c5583f9 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
 #include "llvm/Support/Debug.h"
@@ -18,39 +18,16 @@
 using namespace llvm;
 using namespace dwarf;
 
-static const uint8_t form_sizes_addr4[] = {
-  0, // 0x00 unused
-  4, // 0x01 DW_FORM_addr
-  0, // 0x02 unused
-  0, // 0x03 DW_FORM_block2
-  0, // 0x04 DW_FORM_block4
-  2, // 0x05 DW_FORM_data2
-  4, // 0x06 DW_FORM_data4
-  8, // 0x07 DW_FORM_data8
-  0, // 0x08 DW_FORM_string
-  0, // 0x09 DW_FORM_block
-  0, // 0x0a DW_FORM_block1
-  1, // 0x0b DW_FORM_data1
-  1, // 0x0c DW_FORM_flag
-  0, // 0x0d DW_FORM_sdata
-  4, // 0x0e DW_FORM_strp
-  0, // 0x0f DW_FORM_udata
-  4, // 0x10 DW_FORM_ref_addr
-  1, // 0x11 DW_FORM_ref1
-  2, // 0x12 DW_FORM_ref2
-  4, // 0x13 DW_FORM_ref4
-  8, // 0x14 DW_FORM_ref8
-  0, // 0x15 DW_FORM_ref_udata
-  0, // 0x16 DW_FORM_indirect
-  4, // 0x17 DW_FORM_sec_offset
-  0, // 0x18 DW_FORM_exprloc
-  0, // 0x19 DW_FORM_flag_present
-  8, // 0x20 DW_FORM_ref_sig8
+namespace {
+template <uint8_t AddrSize, uint8_t RefAddrSize> struct FixedFormSizes {
+  static const uint8_t sizes[];
 };
+}
 
-static const uint8_t form_sizes_addr8[] = {
+template <uint8_t AddrSize, uint8_t RefAddrSize>
+const uint8_t FixedFormSizes<AddrSize, RefAddrSize>::sizes[] = {
   0, // 0x00 unused
-  8, // 0x01 DW_FORM_addr
+  AddrSize, // 0x01 DW_FORM_addr
   0, // 0x02 unused
   0, // 0x03 DW_FORM_block2
   0, // 0x04 DW_FORM_block4
@@ -65,7 +42,7 @@ static const uint8_t form_sizes_addr8[] = {
   0, // 0x0d DW_FORM_sdata
   4, // 0x0e DW_FORM_strp
   0, // 0x0f DW_FORM_udata
-  8, // 0x10 DW_FORM_ref_addr
+  RefAddrSize, // 0x10 DW_FORM_ref_addr
   1, // 0x11 DW_FORM_ref1
   2, // 0x12 DW_FORM_ref2
   4, // 0x13 DW_FORM_ref4
@@ -78,13 +55,23 @@ static const uint8_t form_sizes_addr8[] = {
   8, // 0x20 DW_FORM_ref_sig8
 };
 
+static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
+  // FIXME: Support DWARF64.
+  return (Version == 2) ? AddrSize : 4;
+}
+
 const uint8_t *
-DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) {
-  switch (addr_size) {
-  case 4: return form_sizes_addr4;
-  case 8: return form_sizes_addr8;
-  }
-  return NULL;
+DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, uint16_t Version) {
+  uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version);
+  if (AddrSize == 4 && RefAddrSize == 4)
+    return FixedFormSizes<4, 4>::sizes;
+  if (AddrSize == 4 && RefAddrSize == 8)
+    return FixedFormSizes<4, 8>::sizes;
+  if (AddrSize == 8 && RefAddrSize == 4)
+    return FixedFormSizes<8, 4>::sizes;
+  if (AddrSize == 8 && RefAddrSize == 8)
+    return FixedFormSizes<8, 8>::sizes;
+  return 0;
 }
 
 bool
@@ -100,14 +87,16 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     switch (Form) {
     case DW_FORM_addr:
     case DW_FORM_ref_addr: {
-      RelocAddrMap::const_iterator AI
-        = cu->getRelocMap()->find(*offset_ptr);
+      uint16_t AddrSize =
+          (Form == DW_FORM_addr)
+              ? cu->getAddressByteSize()
+              : getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+      RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr);
       if (AI != cu->getRelocMap()->end()) {
         const std::pair<uint8_t, int64_t> &R = AI->second;
-        Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) +
-                     R.second;
+        Value.uval = data.getUnsigned(offset_ptr, AddrSize) + R.second;
       } else
-        Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
+        Value.uval = data.getUnsigned(offset_ptr, AddrSize);
       break;
     }
     case DW_FORM_exprloc:
@@ -172,10 +161,17 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
       Form = data.getULEB128(offset_ptr);
       indirect = true;
       break;
-    case DW_FORM_sec_offset:
+    case DW_FORM_sec_offset: {
       // FIXME: This is 64-bit for DWARF64.
-      Value.uval = data.getU32(offset_ptr);
+      RelocAddrMap::const_iterator AI
+        = cu->getRelocMap()->find(*offset_ptr);
+      if (AI != cu->getRelocMap()->end()) {
+        const std::pair<uint8_t, int64_t> &R = AI->second;
+        Value.uval = data.getU32(offset_ptr) + R.second;
+      } else
+        Value.uval = data.getU32(offset_ptr);
       break;
+    }
     case DW_FORM_flag_present:
       Value.uval = 1;
       break;
@@ -216,7 +212,6 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
                           uint32_t *offset_ptr, const DWARFCompileUnit *cu) {
   bool indirect = false;
   do {
-    indirect = false;
     switch (form) {
     // Blocks if inlined data that have a length field and the data bytes
     // inlined in the .debug_info
@@ -249,9 +244,11 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
 
     // Compile unit address sized values
     case DW_FORM_addr:
-    case DW_FORM_ref_addr:
       *offset_ptr += cu->getAddressByteSize();
       return true;
+    case DW_FORM_ref_addr:
+      *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+      return true;
 
     // 0 byte values - implied from the form.
     case DW_FORM_flag_present:
diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h b/contrib/llvm/lib/DebugInfo/DWARFFormValue.h
deleted file mode 100644
index b863001..0000000
--- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===-- DWARFFormValue.h ----------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_DWARFFORMVALUE_H
-#define LLVM_DEBUGINFO_DWARFFORMVALUE_H
-
-#include "llvm/Support/DataExtractor.h"
-
-namespace llvm {
-
-class DWARFCompileUnit;
-class raw_ostream;
-
-class DWARFFormValue {
-public:
-  struct ValueType {
-    ValueType() : data(NULL) {
-      uval = 0;
-    }
-
-    union {
-      uint64_t uval;
-      int64_t sval;
-      const char* cstr;
-    };
-    const uint8_t* data;
-  };
-
-  enum {
-    eValueTypeInvalid = 0,
-    eValueTypeUnsigned,
-    eValueTypeSigned,
-    eValueTypeCStr,
-    eValueTypeBlock
-  };
-
-private:
-  uint16_t Form;   // Form for this value.
-  ValueType Value; // Contains all data for the form.
-
-public:
-  DWARFFormValue(uint16_t form = 0) : Form(form) {}
-  uint16_t getForm() const { return Form; }
-  const ValueType& value() const { return Value; }
-  void dump(raw_ostream &OS, const DWARFCompileUnit* cu) const;
-  bool extractValue(DataExtractor data, uint32_t *offset_ptr,
-                    const DWARFCompileUnit *cu);
-  bool isInlinedCStr() const {
-    return Value.data != NULL && Value.data == (const uint8_t*)Value.cstr;
-  }
-  const uint8_t *BlockData() const;
-  uint64_t getReference(const DWARFCompileUnit* cu) const;
-
-  /// Resolve any compile unit specific references so that we don't need
-  /// the compile unit at a later time in order to work with the form
-  /// value.
-  bool resolveCompileUnitReferences(const DWARFCompileUnit* cu);
-  uint64_t getUnsigned() const { return Value.uval; }
-  int64_t getSigned() const { return Value.sval; }
-  const char *getAsCString(const DataExtractor *debug_str_data_ptr) const;
-  const char *getIndirectCString(const DataExtractor *,
-                                 const DataExtractor *) const;
-  uint64_t getIndirectAddress(const DataExtractor *,
-                              const DWARFCompileUnit *) const;
-  bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr,
-                 const DWARFCompileUnit *cu) const;
-  static bool skipValue(uint16_t form, DataExtractor debug_info_data,
-                        uint32_t *offset_ptr, const DWARFCompileUnit *cu);
-  static bool isBlockForm(uint16_t form);
-  static bool isDataForm(uint16_t form);
-  static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size);
-};
-
-}
-
-#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 906a3a3..e43ba4f 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -948,7 +948,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
 
-  if (sys::isLittleEndianHost()) {
+  if (sys::IsLittleEndianHost) {
     // Little-endian host - the source is ordered from LSB to MSB.  Order the
     // destination from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, StoreBytes);
@@ -1009,7 +1009,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
     break;
   }
 
-  if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
+  if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian())
     // Host and target are different endian - reverse the stored bytes.
     std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
 }
@@ -1021,7 +1021,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
   uint8_t *Dst = reinterpret_cast<uint8_t *>(
                    const_cast<uint64_t *>(IntVal.getRawData()));
 
-  if (sys::isLittleEndianHost())
+  if (sys::IsLittleEndianHost)
     // Little-endian host - the destination must be ordered from LSB to MSB.
     // The source is ordered from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, LoadBytes);
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index f4e8246..f9b08a0 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -15,11 +15,33 @@
 #include "llvm-c/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 
 using namespace llvm;
 
+// Wrapping the C bindings types.
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
+
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+  return reinterpret_cast<DataLayout*>(P);
+}
+  
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+  return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+  return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+
 /*===-- Operations on generic values --------------------------------------===*/
 
 LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
@@ -132,6 +154,59 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
   return 1;
 }
 
+void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions,
+                                        size_t SizeOfPassedOptions) {
+  LLVMMCJITCompilerOptions options;
+  options.OptLevel = 0;
+  options.CodeModel = LLVMCodeModelJITDefault;
+  options.NoFramePointerElim = false;
+  options.EnableFastISel = false;
+  
+  memcpy(PassedOptions, &options,
+         std::min(sizeof(options), SizeOfPassedOptions));
+}
+
+LLVMBool LLVMCreateMCJITCompilerForModule(
+    LLVMExecutionEngineRef *OutJIT, LLVMModuleRef M,
+    LLVMMCJITCompilerOptions *PassedOptions, size_t SizeOfPassedOptions,
+    char **OutError) {
+  LLVMMCJITCompilerOptions options;
+  // If the user passed a larger sized options struct, then they were compiled
+  // against a newer LLVM. Tell them that something is wrong.
+  if (SizeOfPassedOptions > sizeof(options)) {
+    *OutError = strdup(
+      "Refusing to use options struct that is larger than my own; assuming "
+      "LLVM library mismatch.");
+    return 1;
+  }
+  
+  // Defend against the user having an old version of the API by ensuring that
+  // any fields they didn't see are cleared. We must defend against fields being
+  // set to the bitwise equivalent of zero, and assume that this means "do the
+  // default" as if that option hadn't been available.
+  LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
+  memcpy(&options, PassedOptions, SizeOfPassedOptions);
+  
+  TargetOptions targetOptions;
+  targetOptions.NoFramePointerElim = options.NoFramePointerElim;
+  targetOptions.EnableFastISel = options.EnableFastISel;
+
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::JIT)
+         .setErrorStr(&Error)
+         .setUseMCJIT(true)
+         .setOptLevel((CodeGenOpt::Level)options.OptLevel)
+         .setCodeModel(unwrap(options.CodeModel))
+         .setTargetOptions(targetOptions);
+  if (ExecutionEngine *JIT = builder.create()) {
+    *OutJIT = wrap(JIT);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
 LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
                                    LLVMModuleProviderRef MP,
                                    char **OutError) {
@@ -176,6 +251,8 @@ void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
 int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
                           unsigned ArgC, const char * const *ArgV,
                           const char * const *EnvP) {
+  unwrap(EE)->finalizeObject();
+  
   std::vector<std::string> ArgVec;
   for (unsigned I = 0; I != ArgC; ++I)
     ArgVec.push_back(ArgV[I]);
@@ -186,6 +263,8 @@ int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
 LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
                                     unsigned NumArgs,
                                     LLVMGenericValueRef *Args) {
+  unwrap(EE)->finalizeObject();
+  
   std::vector<GenericValue> ArgVec;
   ArgVec.reserve(NumArgs);
   for (unsigned I = 0; I != NumArgs; ++I)
@@ -234,7 +313,8 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
   return 1;
 }
 
-void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
+                                     LLVMValueRef Fn) {
   return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
 }
 
@@ -248,5 +328,7 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
 }
 
 void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+  unwrap(EE)->finalizeObject();
+  
   return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
 }
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 526c04e..b95a9e8 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
       Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
       break;
 
+#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY)                        \
+  case Type::VectorTyID: {                                           \
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());    \
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );            \
+    for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)             \
+      Dest.AggregateVal[_i].IntVal = APInt(1,                        \
+      Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\
+  } break;
+
 // Handle pointers specially because they must be compared with only as much
 // width as the host has.  We _do not_ want to be comparing 64 bit values when
 // running on a 32-bit target, otherwise the upper 32 bits might mess up
@@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(eq,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty);
     IMPLEMENT_POINTER_ICMP(==);
   default:
     dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
@@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ne,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty);
     IMPLEMENT_POINTER_ICMP(!=);
   default:
     dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
@@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ult,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
     dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
@@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(slt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
     dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
@@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
     dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
@@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
     dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
@@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ule,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
     dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
@@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sle,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
     dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
@@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(uge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
     dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
@@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
     dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
@@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
      Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
      break
 
+#define IMPLEMENT_VECTOR_FCMP_T(OP, TY)                             \
+  assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());     \
+  Dest.AggregateVal.resize( Src1.AggregateVal.size() );             \
+  for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)              \
+    Dest.AggregateVal[_i].IntVal = APInt(1,                         \
+    Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\
+  break;
+
+#define IMPLEMENT_VECTOR_FCMP(OP)                                   \
+  case Type::VectorTyID:                                            \
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {   \
+      IMPLEMENT_VECTOR_FCMP_T(OP, Float);                           \
+    } else {                                                        \
+        IMPLEMENT_VECTOR_FCMP_T(OP, Double);                        \
+    }
+
 static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(==, Float);
     IMPLEMENT_FCMP(==, Double);
+    IMPLEMENT_VECTOR_FCMP(==);
   default:
     dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -298,17 +334,65 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
   return Dest;
 }
 
+#define IMPLEMENT_SCALAR_NANS(TY, X,Y)                                      \
+  if (TY->isFloatTy()) {                                                    \
+    if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) {             \
+      Dest.IntVal = APInt(1,false);                                         \
+      return Dest;                                                          \
+    }                                                                       \
+  } else {                                                                  \
+    if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) {         \
+      Dest.IntVal = APInt(1,false);                                         \
+      return Dest;                                                          \
+    }                                                                       \
+  }
+
+#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG)                                   \
+  assert(X.AggregateVal.size() == Y.AggregateVal.size());                   \
+  Dest.AggregateVal.resize( X.AggregateVal.size() );                        \
+  for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) {                       \
+    if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val ||         \
+        Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val)           \
+      Dest.AggregateVal[_i].IntVal = APInt(1,FLAG);                         \
+    else  {                                                                 \
+      Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG);                        \
+    }                                                                       \
+  }
+
+#define MASK_VECTOR_NANS(TY, X,Y, FLAG)                                     \
+  if (TY->isVectorTy()) {                                                   \
+    if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) {          \
+      MASK_VECTOR_NANS_T(X, Y, Float, FLAG)                                 \
+    } else {                                                                \
+      MASK_VECTOR_NANS_T(X, Y, Double, FLAG)                                \
+    }                                                                       \
+  }                                                                         \
+
+
+
 static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
-                                   Type *Ty) {
+                                    Type *Ty)
+{
   GenericValue Dest;
+  // if input is scalar value and Src1 or Src2 is NaN return false
+  IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2)
+  // if vector input detect NaNs and fill mask
+  MASK_VECTOR_NANS(Ty, Src1, Src2, false)
+  GenericValue DestMask = Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(!=, Float);
     IMPLEMENT_FCMP(!=, Double);
-
-  default:
-    dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
-    llvm_unreachable(0);
+    IMPLEMENT_VECTOR_FCMP(!=);
+    default:
+      dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+      llvm_unreachable(0);
   }
+  // in vector case mask out NaN elements
+  if (Ty->isVectorTy())
+    for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+      if (DestMask.AggregateVal[_i].IntVal == false)
+        Dest.AggregateVal[_i].IntVal = APInt(1,false);
+
   return Dest;
 }
 
@@ -318,6 +402,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(<=, Float);
     IMPLEMENT_FCMP(<=, Double);
+    IMPLEMENT_VECTOR_FCMP(<=);
   default:
     dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -331,6 +416,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(>=, Float);
     IMPLEMENT_FCMP(>=, Double);
+    IMPLEMENT_VECTOR_FCMP(>=);
   default:
     dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -344,6 +430,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(<, Float);
     IMPLEMENT_FCMP(<, Double);
+    IMPLEMENT_VECTOR_FCMP(<);
   default:
     dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -357,6 +444,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(>, Float);
     IMPLEMENT_FCMP(>, Double);
+    IMPLEMENT_VECTOR_FCMP(>);
   default:
     dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -375,18 +463,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
     return Dest;                                                         \
   }
 
+#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC)                       \
+  if (TY->isVectorTy()) {                                                \
+    GenericValue DestMask = Dest;                                        \
+    Dest = _FUNC(Src1, Src2, Ty);                                        \
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)               \
+        if (DestMask.AggregateVal[_i].IntVal == true)                    \
+          Dest.AggregateVal[_i].IntVal = APInt(1,true);                  \
+      return Dest;                                                       \
+  }
 
 static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ)
   return executeFCMP_OEQ(Src1, Src2, Ty);
+
 }
 
 static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE)
   return executeFCMP_ONE(Src1, Src2, Ty);
 }
 
@@ -394,6 +496,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE)
   return executeFCMP_OLE(Src1, Src2, Ty);
 }
 
@@ -401,6 +505,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE)
   return executeFCMP_OGE(Src1, Src2, Ty);
 }
 
@@ -408,6 +514,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT)
   return executeFCMP_OLT(Src1, Src2, Ty);
 }
 
@@ -415,33 +523,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT)
   return executeFCMP_OGT(Src1, Src2, Ty);
 }
 
 static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal ==
+        Src1.AggregateVal[_i].FloatVal) &&
+        (Src2.AggregateVal[_i].FloatVal ==
+        Src2.AggregateVal[_i].FloatVal)));
+    } else {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].DoubleVal ==
+        Src1.AggregateVal[_i].DoubleVal) &&
+        (Src2.AggregateVal[_i].DoubleVal ==
+        Src2.AggregateVal[_i].DoubleVal)));
+    }
+  } else if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && 
                            Src2.FloatVal == Src2.FloatVal));
-  else
+  else {
     Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && 
                            Src2.DoubleVal == Src2.DoubleVal));
+  }
   return Dest;
 }
 
 static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal !=
+           Src1.AggregateVal[_i].FloatVal) ||
+          (Src2.AggregateVal[_i].FloatVal !=
+           Src2.AggregateVal[_i].FloatVal)));
+      } else {
+        for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+          Dest.AggregateVal[_i].IntVal = APInt(1,
+          ( (Src1.AggregateVal[_i].DoubleVal !=
+             Src1.AggregateVal[_i].DoubleVal) ||
+            (Src2.AggregateVal[_i].DoubleVal !=
+             Src2.AggregateVal[_i].DoubleVal)));
+      }
+  } else if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || 
                            Src2.FloatVal != Src2.FloatVal));
-  else
+  else {
     Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || 
                            Src2.DoubleVal != Src2.DoubleVal));
+  }
   return Dest;
 }
 
+static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty, const bool val) {
+  GenericValue Dest;
+    if(Ty->isVectorTy()) {
+      assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+      Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,val);
+    } else {
+      Dest.IntVal = APInt(1, val);
+    }
+
+    return Dest;
+}
+
 void Interpreter::visitFCmpInst(FCmpInst &I) {
   ExecutionContext &SF = ECStack.back();
   Type *Ty    = I.getOperand(0)->getType();
@@ -450,8 +613,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   GenericValue R;   // Result
   
   switch (I.getPredicate()) {
-  case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
-  case FCmpInst::FCMP_TRUE:  R.IntVal = APInt(1,true); break;
+  default:
+    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  break;
+  case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); 
+  break;
+  case FCmpInst::FCMP_TRUE:  R = executeFCMP_BOOL(Src1, Src2, Ty, true); 
+  break;
   case FCmpInst::FCMP_ORD:   R = executeFCMP_ORD(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UNO:   R = executeFCMP_UNO(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UEQ:   R = executeFCMP_UEQ(Src1, Src2, Ty); break;
@@ -466,9 +635,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   case FCmpInst::FCMP_OLE:   R = executeFCMP_OLE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
-  default:
-    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
-    llvm_unreachable(0);
   }
  
   SetValue(&I, R, SF);
@@ -502,16 +668,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
   case FCmpInst::FCMP_ULE:   return executeFCMP_ULE(Src1, Src2, Ty);
   case FCmpInst::FCMP_OGE:   return executeFCMP_OGE(Src1, Src2, Ty);
   case FCmpInst::FCMP_UGE:   return executeFCMP_UGE(Src1, Src2, Ty);
-  case FCmpInst::FCMP_FALSE: { 
-    GenericValue Result;
-    Result.IntVal = APInt(1, false);
-    return Result;
-  }
-  case FCmpInst::FCMP_TRUE: {
-    GenericValue Result;
-    Result.IntVal = APInt(1, true);
-    return Result;
-  }
+  case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false);
+  case FCmpInst::FCMP_TRUE:  return executeFCMP_BOOL(Src1, Src2, Ty, true);
   default:
     dbgs() << "Unhandled Cmp predicate\n";
     llvm_unreachable(0);
@@ -525,27 +683,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue R;   // Result
 
-  switch (I.getOpcode()) {
-  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
-  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
-  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
-  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
-  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
-  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
-  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
-  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
-  case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
-  case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
-  case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
-  case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
-  case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
-  case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
-  case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
-  default:
-    dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
-    llvm_unreachable(0);
+  // First process vector operation
+  if (Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    R.AggregateVal.resize(Src1.AggregateVal.size());
+
+    // Macros to execute binary operation 'OP' over integer vectors
+#define INTEGER_VECTOR_OPERATION(OP)                               \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal;
+
+    // Additional macros to execute binary operations udiv/sdiv/urem/srem since
+    // they have different notation.
+#define INTEGER_VECTOR_FUNCTION(OP)                                \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal);
+
+    // Macros to execute binary operation 'OP' over floating point type TY
+    // (float or double) vectors
+#define FLOAT_VECTOR_FUNCTION(OP, TY)                               \
+      for (unsigned i = 0; i < R.AggregateVal.size(); ++i)          \
+        R.AggregateVal[i].TY =                                      \
+        Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY;
+
+    // Macros to choose appropriate TY: float or double and run operation
+    // execution
+#define FLOAT_VECTOR_OP(OP) {                                         \
+  if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())        \
+    FLOAT_VECTOR_FUNCTION(OP, FloatVal)                               \
+  else {                                                              \
+    if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())     \
+      FLOAT_VECTOR_FUNCTION(OP, DoubleVal)                            \
+    else {                                                            \
+      dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
+      llvm_unreachable(0);                                            \
+    }                                                                 \
+  }                                                                   \
+}
+
+    switch(I.getOpcode()){
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   INTEGER_VECTOR_OPERATION(+) break;
+    case Instruction::Sub:   INTEGER_VECTOR_OPERATION(-) break;
+    case Instruction::Mul:   INTEGER_VECTOR_OPERATION(*) break;
+    case Instruction::UDiv:  INTEGER_VECTOR_FUNCTION(udiv) break;
+    case Instruction::SDiv:  INTEGER_VECTOR_FUNCTION(sdiv) break;
+    case Instruction::URem:  INTEGER_VECTOR_FUNCTION(urem) break;
+    case Instruction::SRem:  INTEGER_VECTOR_FUNCTION(srem) break;
+    case Instruction::And:   INTEGER_VECTOR_OPERATION(&) break;
+    case Instruction::Or:    INTEGER_VECTOR_OPERATION(|) break;
+    case Instruction::Xor:   INTEGER_VECTOR_OPERATION(^) break;
+    case Instruction::FAdd:  FLOAT_VECTOR_OP(+) break;
+    case Instruction::FSub:  FLOAT_VECTOR_OP(-) break;
+    case Instruction::FMul:  FLOAT_VECTOR_OP(*) break;
+    case Instruction::FDiv:  FLOAT_VECTOR_OP(/) break;
+    case Instruction::FRem:
+      if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+          R.AggregateVal[i].FloatVal = 
+          fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
+      else {
+        if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+          for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+            R.AggregateVal[i].DoubleVal = 
+            fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
+        else {
+          dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+          llvm_unreachable(0);
+        }
+      }
+      break;
+    }
+  } else {
+    switch (I.getOpcode()) {
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+    case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+    case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+    case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+    case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+    case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+    case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+    case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
+    case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+    case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+    case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+    case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+    case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
+    case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
+    case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+    }
   }
-
   SetValue(&I, R, SF);
 }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index fee10e1..38aa547 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -46,13 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
 
-  return new MCJIT(M, TM, JMM, GVsWithCode);
+  return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode);
 }
 
 MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
              bool AllocateGVsWithCode)
-  : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
-    isCompiled(false), M(m)  {
+  : ExecutionEngine(m), TM(tm), Ctx(0),
+    MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr),
+    IsLoaded(false), M(m), ObjCache(0)  {
 
   setDataLayout(TM->getDataLayout());
 }
@@ -64,7 +66,11 @@ MCJIT::~MCJIT() {
   delete TM;
 }
 
-void MCJIT::emitObject(Module *m) {
+void MCJIT::setObjectCache(ObjectCache* NewCache) {
+  ObjCache = NewCache;
+}
+
+ObjectBufferStream* MCJIT::emitObject(Module *m) {
   /// Currently, MCJIT only supports a single module and the module passed to
   /// this function call is expected to be the contained module.  The module
   /// is passed as a parameter here to prepare for multiple module support in
@@ -77,30 +83,66 @@ void MCJIT::emitObject(Module *m) {
   // FIXME: Track compilation state on a per-module basis when multiple modules
   //        are supported.
   // Re-compilation is not supported
-  if (isCompiled)
-    return;
+  assert(!IsLoaded);
 
   PassManager PM;
 
   PM.add(new DataLayout(*TM->getDataLayout()));
 
   // The RuntimeDyld will take ownership of this shortly
-  OwningPtr<ObjectBufferStream> Buffer(new ObjectBufferStream());
+  OwningPtr<ObjectBufferStream> CompiledObject(new ObjectBufferStream());
 
   // Turn the machine code intermediate representation into bytes in memory
   // that may be executed.
-  if (TM->addPassesToEmitMC(PM, Ctx, Buffer->getOStream(), false)) {
+  if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), false)) {
     report_fatal_error("Target does not support MC emission!");
   }
 
   // Initialize passes.
   PM.run(*m);
   // Flush the output buffer to get the generated code into memory
-  Buffer->flush();
+  CompiledObject->flush();
+
+  // If we have an object cache, tell it about the new object.
+  // Note that we're using the compiled image, not the loaded image (as below).
+  if (ObjCache) {
+    // MemoryBuffer is a thin wrapper around the actual memory, so it's OK
+    // to create a temporary object here and delete it after the call.
+    OwningPtr<MemoryBuffer> MB(CompiledObject->getMemBuffer());
+    ObjCache->notifyObjectCompiled(m, MB.get());
+  }
+
+  return CompiledObject.take();
+}
+
+void MCJIT::loadObject(Module *M) {
+
+  // Get a thread lock to make sure we aren't trying to load multiple times
+  MutexGuard locked(lock);
+
+  // FIXME: Track compilation state on a per-module basis when multiple modules
+  //        are supported.
+  // Re-compilation is not supported
+  if (IsLoaded)
+    return;
+
+  OwningPtr<ObjectBuffer> ObjectToLoad;
+  // Try to load the pre-compiled object from cache if possible
+  if (0 != ObjCache) {
+    OwningPtr<MemoryBuffer> PreCompiledObject(ObjCache->getObjectCopy(M));
+    if (0 != PreCompiledObject.get())
+      ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.take()));
+  }
+
+  // If the cache did not contain a suitable object, compile the object
+  if (!ObjectToLoad) {
+    ObjectToLoad.reset(emitObject(M));
+    assert(ObjectToLoad.get() && "Compilation did not produce an object.");
+  }
 
   // Load the object into the dynamic linker.
   // handing off ownership of the buffer
-  LoadedObject.reset(Dyld.loadObject(Buffer.take()));
+  LoadedObject.reset(Dyld.loadObject(ObjectToLoad.take()));
   if (!LoadedObject)
     report_fatal_error(Dyld.getErrorString());
 
@@ -113,7 +155,7 @@ void MCJIT::emitObject(Module *m) {
   NotifyObjectEmitted(*LoadedObject);
 
   // FIXME: Add support for per-module compilation state
-  isCompiled = true;
+  IsLoaded = true;
 }
 
 // FIXME: Add a parameter to identify which object is being finalized when
@@ -122,19 +164,18 @@ void MCJIT::emitObject(Module *m) {
 // protection in the interface.
 void MCJIT::finalizeObject() {
   // If the module hasn't been compiled, just do that.
-  if (!isCompiled) {
-    // If the call to Dyld.resolveRelocations() is removed from emitObject()
+  if (!IsLoaded) {
+    // If the call to Dyld.resolveRelocations() is removed from loadObject()
     // we'll need to do that here.
-    emitObject(M);
-
-    // Set page permissions.
-    MemMgr->applyPermissions();
-
-    return;
+    loadObject(M);
+  } else {
+    // Resolve any relocations.
+    Dyld.resolveRelocations();
   }
 
-  // Resolve any relocations.
-  Dyld.resolveRelocations();
+  StringRef EHData = Dyld.getEHFrameSection();
+  if (!EHData.empty())
+    MemMgr->registerEHFrames(EHData);
 
   // Set page permissions.
   MemMgr->applyPermissions();
@@ -151,8 +192,8 @@ void *MCJIT::getPointerToFunction(Function *F) {
   // dies.
 
   // FIXME: Add support for per-module compilation state
-  if (!isCompiled)
-    emitObject(M);
+  if (!IsLoaded)
+    loadObject(M);
 
   if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
     bool AbortOnFailure = !F->hasExternalWeakLinkage();
@@ -284,8 +325,8 @@ GenericValue MCJIT::runFunction(Function *F,
 void *MCJIT::getPointerToNamedFunction(const std::string &Name,
                                        bool AbortOnFailure) {
   // FIXME: Add support for per-module compilation state
-  if (!isCompiled)
-    emitObject(M);
+  if (!IsLoaded)
+    loadObject(M);
 
   if (!isSymbolSearchingDisabled() && MemMgr) {
     void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index 283a8e5..8c4bf6e 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/PassManager.h"
 
@@ -34,16 +35,23 @@ class MCJIT : public ExecutionEngine {
   SmallVector<JITEventListener*, 2> EventListeners;
 
   // FIXME: Add support for multiple modules
-  bool isCompiled;
+  bool IsLoaded;
   Module *M;
   OwningPtr<ObjectImage> LoadedObject;
 
+  // An optional ObjectCache to be notified of compiled objects and used to
+  // perform lookup of pre-compiled code to avoid re-compilation.
+  ObjectCache *ObjCache;
+
 public:
   ~MCJIT();
 
   /// @name ExecutionEngine interface implementation
   /// @{
 
+  /// Sets the object manager that MCJIT should use to avoid compilation.
+  virtual void setObjectCache(ObjectCache *manager);
+
   virtual void finalizeObject();
 
   virtual void *getPointerToBasicBlock(BasicBlock *BB);
@@ -102,7 +110,9 @@ protected:
   /// this function call is expected to be the contained module.  The module
   /// is passed as a parameter here to prepare for multiple module support in 
   /// the future.
-  void emitObject(Module *M);
+  ObjectBufferStream* emitObject(Module *M);
+
+  void loadObject(Module *M);
 
   void NotifyObjectEmitted(const ObjectImage& Obj);
   void NotifyFreeingObject(const ObjectImage& Obj);
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index fa35acd..bac77ce 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -138,9 +138,46 @@ bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
 
   // Read-write data memory already has the correct permissions
 
+  // Some platforms with separate data cache and instruction cache require
+  // explicit cache flush, otherwise JIT code manipulations (like resolved
+  // relocations) will get to the data cache but not to the instruction cache.
+  invalidateInstructionCache();
+
   return false;
 }
 
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+extern "C" void __register_frame(void*);
+
+static const char *processFDE(const char *Entry) {
+  const char *P = Entry;
+  uint32_t Length = *((uint32_t*)P);
+  P += 4;
+  uint32_t Offset = *((uint32_t*)P);
+  if (Offset != 0)
+    __register_frame((void*)Entry);
+  return P + Length;
+}
+#endif
+
+void SectionMemoryManager::registerEHFrames(StringRef SectionData) {
+#if HAVE_EHTABLE_SUPPORT
+  const char *P = SectionData.data();
+  const char *End = SectionData.data() + SectionData.size();
+  do  {
+    P = processFDE(P);
+  } while(P != End);
+#endif
+}
+
 error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
                                                              unsigned Permissions) {
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 409b25f..a08b508 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -25,10 +25,15 @@ using namespace llvm::object;
 
 // Empty out-of-line virtual destructor as the key function.
 RTDyldMemoryManager::~RTDyldMemoryManager() {}
+void RTDyldMemoryManager::registerEHFrames(StringRef SectionData) {}
 RuntimeDyldImpl::~RuntimeDyldImpl() {}
 
 namespace llvm {
 
+StringRef RuntimeDyldImpl::getEHFrameSection() {
+  return StringRef();
+}
+
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
   // First, resolve relocations associated with external symbols.
@@ -96,7 +101,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
     bool isCommon = flags & SymbolRef::SF_Common;
     if (isCommon) {
       // Add the common symbols to a list.  We'll allocate them all below.
-      uint64_t Align = getCommonSymbolAlignment(*i);
+      uint32_t Align;
+      Check(i->getAlignment(Align));
       uint64_t Size = 0;
       Check(i->getSize(Size));
       CommonSize += Size + Align;
@@ -154,18 +160,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
         isFirstRelocation = false;
       }
 
-      ObjRelocationInfo RI;
-      RI.SectionID = SectionID;
-      Check(i->getAdditionalInfo(RI.AdditionalInfo));
-      Check(i->getOffset(RI.Offset));
-      Check(i->getSymbol(RI.Symbol));
-      Check(i->getType(RI.Type));
-
-      DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
-                   << " Offset: " << format("%p", (uintptr_t)RI.Offset)
-                   << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
-                   << "\n");
-      processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+      processRelocationRef(SectionID, *i, *obj, LocalSections, LocalSymbols,
+			   Stubs);
     }
   }
 
@@ -183,7 +179,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
-  Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0));
+  Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, 0));
   memset(Addr, 0, TotalSize);
 
   DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
@@ -243,6 +239,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
   Check(Section.isReadOnlyData(IsReadOnly));
   Check(Section.getSize(DataSize));
   Check(Section.getName(Name));
+  if (StubSize > 0) {
+    unsigned StubAlignment = getStubAlignment();
+    unsigned EndAlignment = (DataSize | Alignment) & -(DataSize | Alignment);
+    if (StubAlignment > EndAlignment)
+      StubBufSize += StubAlignment - EndAlignment;
+  }
 
   unsigned Allocate;
   unsigned SectionID = Sections.size();
@@ -295,8 +297,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
                  << "\n");
   }
 
-  Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize,
-				  (uintptr_t)pData));
+  Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData));
   return SectionID;
 }
 
@@ -339,7 +340,25 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
 }
 
 uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
-  if (Arch == Triple::arm) {
+  if (Arch == Triple::aarch64) {
+    // This stub has to be able to access the full address space,
+    // since symbol lookup won't necessarily find a handy, in-range,
+    // PLT stub for functions which could be anywhere.
+    uint32_t *StubAddr = (uint32_t*)Addr;
+
+    // Stub can use ip0 (== x16) to calculate address
+    *StubAddr = 0xd2e00010; // movz ip0, #:abs_g3:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2c00010; // movk ip0, #:abs_g2_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2a00010; // movk ip0, #:abs_g1_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2800010; // movk ip0, #:abs_g0_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xd61f0200; // br ip0
+
+    return Addr;
+  } else if (Arch == Triple::arm) {
     // TODO: There is only ARM far stub now. We should add the Thumb stub,
     // and stubs for branches Thumb - ARM and ARM - Thumb.
     uint32_t *StubAddr = (uint32_t*)Addr;
@@ -380,6 +399,13 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
     writeInt32BE(Addr+40, 0x4E800420); // bctr
 
     return Addr;
+  } else if (Arch == Triple::systemz) {
+    writeInt16BE(Addr,    0xC418);     // lgrl %r1,.+8
+    writeInt16BE(Addr+2,  0x0000);
+    writeInt16BE(Addr+4,  0x0004);
+    writeInt16BE(Addr+6,  0x07F1);     // brc 15,%r1
+    // 8-byte address stored at Addr + 8
+    return Addr;
   }
   return Addr;
 }
@@ -401,26 +427,14 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
   Sections[SectionID].LoadAddress = Addr;
 }
 
-void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
-                                             uint64_t Value) {
-  // Ignore relocations for sections that were not loaded
-  if (Sections[RE.SectionID].Address != 0) {
-    DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
-          << " + " << RE.Offset << " ("
-          << format("%p", Sections[RE.SectionID].Address + RE.Offset) << ")"
-          << " RelType: " << RE.RelType
-          << " Addend: " << RE.Addend
-          << "\n");
-
-    resolveRelocation(Sections[RE.SectionID], RE.Offset,
-                      Value, RE.RelType, RE.Addend);
-  }
-}
-
 void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
                                             uint64_t Value) {
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    resolveRelocationEntry(Relocs[i], Value);
+    const RelocationEntry &RE = Relocs[i];
+    // Ignore relocations for sections that were not loaded
+    if (Sections[RE.SectionID].Address == 0)
+      continue;
+    resolveRelocation(RE, Value);
   }
 }
 
@@ -534,4 +548,8 @@ StringRef RuntimeDyld::getErrorString() {
   return Dyld->getErrorString();
 }
 
+StringRef RuntimeDyld::getEHFrameSection() {
+  return Dyld->getEHFrameSection();
+}
+
 } // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index b8537b1..d4d84d3 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -41,7 +41,7 @@ error_code check(error_code Err) {
 template<class ELFT>
 class DyldELFObject
   : public ELFObjectFile<ELFT> {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
 
   typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
   typedef Elf_Sym_Impl<ELFT> Elf_Sym;
@@ -151,6 +151,14 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
 
 namespace llvm {
 
+StringRef RuntimeDyldELF::getEHFrameSection() {
+  for (int i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Name == ".eh_frame")
+      return StringRef((const char*)Sections[i].Address, Sections[i].Size);
+  }
+  return StringRef();
+}
+
 ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
   if (Buffer->getBufferSize() < ELF::EI_NIDENT)
     llvm_unreachable("Unexpected ELF object size");
@@ -269,6 +277,85 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
   }
 }
 
+void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
+                                              uint64_t Offset,
+                                              uint64_t Value,
+                                              uint32_t Type,
+                                              int64_t Addend) {
+  uint32_t *TargetPtr = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+  uint64_t FinalAddress = Section.LoadAddress + Offset;
+
+  DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x"
+               << format("%llx", Section.Address + Offset)
+               << " FinalAddress: 0x" << format("%llx",FinalAddress)
+               << " Value: 0x" << format("%llx",Value)
+               << " Type: 0x" << format("%x",Type)
+               << " Addend: 0x" << format("%llx",Addend)
+               << "\n");
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_AARCH64_ABS64: {
+    uint64_t *TargetPtr = reinterpret_cast<uint64_t*>(Section.Address + Offset);
+    *TargetPtr = Value + Addend;
+    break;
+  }
+  case ELF::R_AARCH64_PREL32: { // test-shift.ll (.eh_frame)
+    uint64_t Result = Value + Addend - FinalAddress;
+    assert(static_cast<int64_t>(Result) >= INT32_MIN && 
+           static_cast<int64_t>(Result) <= UINT32_MAX);
+    *TargetPtr = static_cast<uint32_t>(Result & 0xffffffffU);
+    break;
+  }
+  case ELF::R_AARCH64_CALL26: // fallthrough
+  case ELF::R_AARCH64_JUMP26: {
+    // Operation: S+A-P. Set Call or B immediate value to bits fff_fffc of the
+    // calculation.
+    uint64_t BranchImm = Value + Addend - FinalAddress;
+
+    // "Check that -2^27 <= result < 2^27".
+    assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) && 
+           static_cast<int64_t>(BranchImm) < (1LL << 27));
+    // Immediate goes in bits 25:0 of B and BL.
+    *TargetPtr |= static_cast<uint32_t>(BranchImm & 0xffffffcU) >> 2;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G3: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= Result >> (48 - 5);
+    // Shift is "lsl #48", in bits 22:21
+    *TargetPtr |= 3 << 21;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G2_NC: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffff00000000ULL) >> (32 - 5));
+    // Shift is "lsl #32", in bits 22:21
+    *TargetPtr |= 2 << 21;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G1_NC: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffff0000U) >> (16 - 5));
+    // Shift is "lsl #16", in bits 22:21
+    *TargetPtr |= 1 << 21;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G0_NC: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffffU) << 5);
+    // Shift is "lsl #0", in bits 22:21. No action needed.
+    break;
+  }
+  }
+}
+
 void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
                                           uint64_t Offset,
                                           uint32_t Value,
@@ -541,6 +628,11 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
       llvm_unreachable("Relocation R_PPC64_REL32 overflow");
     writeInt32BE(LocalAddress, delta);
   } break;
+  case ELF::R_PPC64_REL64: {
+    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t Delta = Value - FinalAddress + Addend;
+    writeInt64BE(LocalAddress, Delta);
+  } break;
   case ELF::R_PPC64_ADDR64 :
     writeInt64BE(LocalAddress, Value + Addend);
     break;
@@ -560,6 +652,48 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
   }
 }
 
+void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
+                                              uint64_t Offset,
+                                              uint64_t Value,
+                                              uint32_t Type,
+                                              int64_t Addend) {
+  uint8_t *LocalAddress = Section.Address + Offset;
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_390_PC16DBL:
+  case ELF::R_390_PLT16DBL: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow");
+    writeInt16BE(LocalAddress, Delta / 2);
+    break;
+  }
+  case ELF::R_390_PC32DBL:
+  case ELF::R_390_PLT32DBL: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow");
+    writeInt32BE(LocalAddress, Delta / 2);
+    break;
+  }
+  case ELF::R_390_PC32: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int32_t(Delta) == Delta && "R_390_PC32 overflow");
+    writeInt32BE(LocalAddress, Delta);
+    break;
+  }
+  case ELF::R_390_64:
+    writeInt64BE(LocalAddress, Value + Addend);
+    break;
+  }
+}
+
+void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE,
+				       uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend);
+}
+
 void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                                        uint64_t Offset,
                                        uint64_t Value,
@@ -574,6 +708,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                          (uint32_t)(Value & 0xffffffffL), Type,
                          (uint32_t)(Addend & 0xffffffffL));
     break;
+  case Triple::aarch64:
+    resolveAArch64Relocation(Section, Offset, Value, Type, Addend);
+    break;
   case Triple::arm:    // Fall through.
   case Triple::thumb:
     resolveARMRelocation(Section, Offset,
@@ -589,19 +726,25 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
   case Triple::ppc64:
     resolvePPC64Relocation(Section, Offset, Value, Type, Addend);
     break;
+  case Triple::systemz:
+    resolveSystemZRelocation(Section, Offset, Value, Type, Addend);
+    break;
   default: llvm_unreachable("Unsupported CPU type!");
   }
 }
 
-void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
+                                          RelocationRef RelI,
                                           ObjectImage &Obj,
                                           ObjSectionToIDMap &ObjSectionToID,
                                           const SymbolTableMap &Symbols,
                                           StubMap &Stubs) {
-
-  uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
-  intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
-  const SymbolRef &Symbol = Rel.Symbol;
+  uint64_t RelType;
+  Check(RelI.getType(RelType));
+  int64_t Addend;
+  Check(RelI.getAdditionalInfo(Addend));
+  SymbolRef Symbol;
+  Check(RelI.getSymbol(Symbol));
 
   // Obtain the symbol name which is referenced in the relocation
   StringRef TargetName;
@@ -617,14 +760,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
   Symbol.getType(SymType);
   if (lsi != Symbols.end()) {
     Value.SectionID = lsi->second.first;
-    Value.Addend = lsi->second.second;
+    Value.Addend = lsi->second.second + Addend;
   } else {
     // Search for the symbol in the global symbol table
     SymbolTableMap::const_iterator gsi =
         GlobalSymbolTable.find(TargetName.data());
     if (gsi != GlobalSymbolTable.end()) {
       Value.SectionID = gsi->second.first;
-      Value.Addend = gsi->second.second;
+      Value.Addend = gsi->second.second + Addend;
     } else {
       switch (SymType) {
         case SymbolRef::ST_Debug: {
@@ -657,21 +800,73 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       }
     }
   }
-  DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
-               << " Rel.Offset: " << Rel.Offset
+  uint64_t Offset;
+  Check(RelI.getOffset(Offset));
+
+  DEBUG(dbgs() << "\t\tSectionID: " << SectionID
+               << " Offset: " << Offset
                << "\n");
-  if (Arch == Triple::arm &&
+  if (Arch == Triple::aarch64 &&
+      (RelType == ELF::R_AARCH64_CALL26 ||
+       RelType == ELF::R_AARCH64_JUMP26)) {
+    // This is an AArch64 branch relocation, need to use a stub function.
+    DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
+    SectionEntry &Section = Sections[SectionID];
+
+    // Look for an existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    if (i != Stubs.end()) {
+        resolveRelocation(Section, Offset,
+                          (uint64_t)Section.Address + i->second, RelType, 0);
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+
+      RelocationEntry REmovz_g3(SectionID,
+                                StubTargetAddr - Section.Address,
+                                ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
+      RelocationEntry REmovk_g2(SectionID,
+                                StubTargetAddr - Section.Address + 4,
+                                ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
+      RelocationEntry REmovk_g1(SectionID,
+                                StubTargetAddr - Section.Address + 8,
+                                ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
+      RelocationEntry REmovk_g0(SectionID,
+                                StubTargetAddr - Section.Address + 12,
+                                ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
+
+      if (Value.SymbolName) {
+        addRelocationForSymbol(REmovz_g3, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g2, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g1, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g0, Value.SymbolName);
+      } else {
+        addRelocationForSection(REmovz_g3, Value.SectionID);
+        addRelocationForSection(REmovk_g2, Value.SectionID);
+        addRelocationForSection(REmovk_g1, Value.SectionID);
+        addRelocationForSection(REmovk_g0, Value.SectionID);
+      }
+      resolveRelocation(Section, Offset,
+                        (uint64_t)Section.Address + Section.StubOffset,
+                        RelType, 0);
+      Section.StubOffset += getMaxStubSize();
+    }
+  } else if (Arch == Triple::arm &&
       (RelType == ELF::R_ARM_PC24 ||
        RelType == ELF::R_ARM_CALL ||
        RelType == ELF::R_ARM_JUMP24)) {
     // This is an ARM branch relocation, need to use a stub function.
     DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
-    SectionEntry &Section = Sections[Rel.SectionID];
+    SectionEntry &Section = Sections[SectionID];
 
     // Look for an existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end()) {
-        resolveRelocation(Section, Rel.Offset,
+        resolveRelocation(Section, Offset,
                           (uint64_t)Section.Address + i->second, RelType, 0);
       DEBUG(dbgs() << " Stub function found\n");
     } else {
@@ -680,14 +875,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       Stubs[Value] = Section.StubOffset;
       uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                    Section.StubOffset);
-      RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+      RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                          ELF::R_ARM_ABS32, Value.Addend);
       if (Value.SymbolName)
         addRelocationForSymbol(RE, Value.SymbolName);
       else
         addRelocationForSection(RE, Value.SectionID);
 
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
                         RelType, 0);
       Section.StubOffset += getMaxStubSize();
@@ -696,8 +891,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
              RelType == ELF::R_MIPS_26) {
     // This is an Mips branch relocation, need to use a stub function.
     DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
-    SectionEntry &Section = Sections[Rel.SectionID];
-    uint8_t *Target = Section.Address + Rel.Offset;
+    SectionEntry &Section = Sections[SectionID];
+    uint8_t *Target = Section.Address + Offset;
     uint32_t *TargetAddress = (uint32_t *)Target;
 
     // Extract the addend from the instruction.
@@ -708,7 +903,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
     //  Look up for existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end()) {
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + i->second, RelType, 0);
       DEBUG(dbgs() << " Stub function found\n");
     } else {
@@ -719,10 +914,10 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
                                                    Section.StubOffset);
 
       // Creating Hi and Lo relocations for the filled stub instructions.
-      RelocationEntry REHi(Rel.SectionID,
+      RelocationEntry REHi(SectionID,
                            StubTargetAddr - Section.Address,
                            ELF::R_MIPS_HI16, Value.Addend);
-      RelocationEntry RELo(Rel.SectionID,
+      RelocationEntry RELo(SectionID,
                            StubTargetAddr - Section.Address + 4,
                            ELF::R_MIPS_LO16, Value.Addend);
 
@@ -734,7 +929,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         addRelocationForSection(RELo, Value.SectionID);
       }
 
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
                         RelType, 0);
       Section.StubOffset += getMaxStubSize();
@@ -744,8 +939,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       // A PPC branch relocation will need a stub function if the target is
       // an external symbol (Symbol::ST_Unknown) or if the target address
       // is not within the signed 24-bits branch address.
-      SectionEntry &Section = Sections[Rel.SectionID];
-      uint8_t *Target = Section.Address + Rel.Offset;
+      SectionEntry &Section = Sections[SectionID];
+      uint8_t *Target = Section.Address + Offset;
       bool RangeOverflow = false;
       if (SymType != SymbolRef::ST_Unknown) {
         // A function call may points to the .opd entry, so the final symbol value
@@ -755,7 +950,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         int32_t delta = static_cast<int32_t>(Target - RelocTarget);
         // If it is within 24-bits branch range, just set the branch target
         if (SignExtend32<24>(delta) == delta) {
-          RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+          RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
           if (Value.SymbolName)
             addRelocationForSymbol(RE, Value.SymbolName);
           else
@@ -770,7 +965,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         StubMap::const_iterator i = Stubs.find(Value);
         if (i != Stubs.end()) {
           // Symbol function stub already created, just relocate to it
-          resolveRelocation(Section, Rel.Offset,
+          resolveRelocation(Section, Offset,
                             (uint64_t)Section.Address + i->second, RelType, 0);
           DEBUG(dbgs() << " Stub function found\n");
         } else {
@@ -779,21 +974,21 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
           Stubs[Value] = Section.StubOffset;
           uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                        Section.StubOffset);
-          RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+          RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                              ELF::R_PPC64_ADDR64, Value.Addend);
 
           // Generates the 64-bits address loads as exemplified in section
           // 4.5.1 in PPC64 ELF ABI.
-          RelocationEntry REhst(Rel.SectionID,
+          RelocationEntry REhst(SectionID,
                                 StubTargetAddr - Section.Address + 2,
                                 ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend);
-          RelocationEntry REhr(Rel.SectionID,
+          RelocationEntry REhr(SectionID,
                                StubTargetAddr - Section.Address + 6,
                                ELF::R_PPC64_ADDR16_HIGHER, Value.Addend);
-          RelocationEntry REh(Rel.SectionID,
+          RelocationEntry REh(SectionID,
                               StubTargetAddr - Section.Address + 14,
                               ELF::R_PPC64_ADDR16_HI, Value.Addend);
-          RelocationEntry REl(Rel.SectionID,
+          RelocationEntry REl(SectionID,
                               StubTargetAddr - Section.Address + 18,
                               ELF::R_PPC64_ADDR16_LO, Value.Addend);
 
@@ -809,7 +1004,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
             addRelocationForSection(REl,   Value.SectionID);
           }
 
-          resolveRelocation(Section, Rel.Offset,
+          resolveRelocation(Section, Offset,
                             (uint64_t)Section.Address + Section.StubOffset,
                             RelType, 0);
           if (SymType == SymbolRef::ST_Unknown)
@@ -819,7 +1014,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         }
       }
     } else {
-      RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+      RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
       // Extra check to avoid relocation againt empty symbols (usually
       // the R_PPC64_TOC).
       if (Value.SymbolName && !TargetName.empty())
@@ -827,8 +1022,55 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       else
         addRelocationForSection(RE, Value.SectionID);
     }
+  } else if (Arch == Triple::systemz &&
+             (RelType == ELF::R_390_PLT32DBL ||
+              RelType == ELF::R_390_GOTENT)) {
+    // Create function stubs for both PLT and GOT references, regardless of
+    // whether the GOT reference is to data or code.  The stub contains the
+    // full address of the symbol, as needed by GOT references, and the
+    // executable part only adds an overhead of 8 bytes.
+    //
+    // We could try to conserve space by allocating the code and data
+    // parts of the stub separately.  However, as things stand, we allocate
+    // a stub for every relocation, so using a GOT in JIT code should be
+    // no less space efficient than using an explicit constant pool.
+    DEBUG(dbgs() << "\t\tThis is a SystemZ indirect relocation.");
+    SectionEntry &Section = Sections[SectionID];
+
+    // Look for an existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    uintptr_t StubAddress;
+    if (i != Stubs.end()) {
+      StubAddress = uintptr_t(Section.Address) + i->second;
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+
+      uintptr_t BaseAddress = uintptr_t(Section.Address);
+      uintptr_t StubAlignment = getStubAlignment();
+      StubAddress = (BaseAddress + Section.StubOffset +
+                     StubAlignment - 1) & -StubAlignment;
+      unsigned StubOffset = StubAddress - BaseAddress;
+
+      Stubs[Value] = StubOffset;
+      createStubFunction((uint8_t *)StubAddress);
+      RelocationEntry RE(SectionID, StubOffset + 8,
+                         ELF::R_390_64, Value.Addend - Addend);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+      Section.StubOffset = StubOffset + getMaxStubSize();
+    }
+
+    if (RelType == ELF::R_390_GOTENT)
+      resolveRelocation(Section, Offset, StubAddress + 8,
+                        ELF::R_390_PC32DBL, Addend);
+    else
+      resolveRelocation(Section, Offset, StubAddress, RelType, Addend);
   } else {
-    RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
     else
@@ -836,13 +1078,6 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
   }
 }
 
-unsigned RuntimeDyldELF::getCommonSymbolAlignment(const SymbolRef &Sym) {
-  // In ELF, the value of an SHN_COMMON symbol is its alignment requirement.
-  uint64_t Align;
-  Check(Sym.getValue(Align));
-  return Align;
-}
-
 bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const {
   if (Buffer->getBufferSize() < strlen(ELF::ElfMagic))
     return false;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 07e704b..794c7ec 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -31,7 +31,12 @@ namespace {
 } // end anonymous namespace
 
 class RuntimeDyldELF : public RuntimeDyldImpl {
-protected:
+  void resolveRelocation(const SectionEntry &Section,
+                         uint64_t Offset,
+                         uint64_t Value,
+                         uint32_t Type,
+                         int64_t Addend);
+
   void resolveX86_64Relocation(const SectionEntry &Section,
                                uint64_t Offset,
                                uint64_t Value,
@@ -44,6 +49,12 @@ protected:
                             uint32_t Type,
                             int32_t Addend);
 
+  void resolveAArch64Relocation(const SectionEntry &Section,
+                                uint64_t Offset,
+                                uint64_t Value,
+                                uint32_t Type,
+                                int64_t Addend);
+
   void resolveARMRelocation(const SectionEntry &Section,
                             uint64_t Offset,
                             uint32_t Value,
@@ -62,21 +73,11 @@ protected:
                               uint32_t Type,
                               int64_t Addend);
 
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend);
-
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
-                                    ObjectImage &Obj,
-                                    ObjSectionToIDMap &ObjSectionToID,
-                                    const SymbolTableMap &Symbols,
-                                    StubMap &Stubs);
-
-  unsigned getCommonSymbolAlignment(const SymbolRef &Sym);
-
-  virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+  void resolveSystemZRelocation(const SectionEntry &Section,
+                                uint64_t Offset,
+                                uint64_t Value,
+                                uint32_t Type,
+                                int64_t Addend);
 
   uint64_t findPPC64TOC() const;
   void findOPDEntrySection(ObjectImage &Obj,
@@ -84,12 +85,19 @@ protected:
                            RelocationValueRef &Rel);
 
 public:
-  RuntimeDyldELF(RTDyldMemoryManager *mm)
-      : RuntimeDyldImpl(mm) {}
+  RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
 
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
+                                    ObjectImage &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    const SymbolTableMap &Symbols,
+                                    StubMap &Stubs);
+  virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+  virtual StringRef getEHFrameSection();
   virtual ~RuntimeDyldELF();
-
-  bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index f100994..383ffab 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -49,7 +49,7 @@ public:
   /// Address - address in the linker's memory where the section resides.
   uint8_t *Address;
 
-  /// Size - section size.
+  /// Size - section size. Doesn't include the stubs.
   size_t Size;
 
   /// LoadAddress - the address of the section in the target process's memory.
@@ -67,9 +67,9 @@ public:
   uintptr_t ObjAddress;
 
   SectionEntry(StringRef name, uint8_t *address, size_t size,
-	       uintptr_t stubOffset, uintptr_t objAddress)
+               uintptr_t objAddress)
     : Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address),
-      StubOffset(stubOffset), ObjAddress(objAddress) {}
+      StubOffset(size), ObjAddress(objAddress) {}
 };
 
 /// RelocationEntry - used to represent relocations internally in the dynamic
@@ -89,20 +89,20 @@ public:
   /// used to make a relocation section relative instead of symbol relative.
   intptr_t Addend;
 
+  /// True if this is a PCRel relocation (MachO specific).
+  bool IsPCRel;
+
+  /// The size of this relocation (MachO specific).
+  unsigned Size;
+
   RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend)
-    : SectionID(id), Offset(offset), RelType(type), Addend(addend) {}
-};
+    : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+      IsPCRel(false), Size(0) {}
 
-/// ObjRelocationInfo - relocation information as read from the object file.
-/// Used to pass around data taken from object::RelocationRef, together with
-/// the section to which the relocation points (represented by a SectionID).
-class ObjRelocationInfo {
-public:
-  unsigned  SectionID;
-  uint64_t  Offset;
-  SymbolRef Symbol;
-  uint64_t  Type;
-  int64_t   AdditionalInfo;
+  RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
+                  bool IsPCRel, unsigned Size)
+    : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+      IsPCRel(IsPCRel), Size(Size) {}
 };
 
 class RelocationValueRef {
@@ -166,16 +166,29 @@ protected:
   Triple::ArchType Arch;
 
   inline unsigned getMaxStubSize() {
+    if (Arch == Triple::aarch64)
+      return 20; // movz; movk; movk; movk; br
     if (Arch == Triple::arm || Arch == Triple::thumb)
       return 8; // 32-bit instruction and 32-bit address
     else if (Arch == Triple::mipsel || Arch == Triple::mips)
       return 16;
     else if (Arch == Triple::ppc64)
       return 44;
+    else if (Arch == Triple::x86_64)
+      return 8; // GOT
+    else if (Arch == Triple::systemz)
+      return 16;
     else
       return 0;
   }
 
+  inline unsigned getStubAlignment() {
+    if (Arch == Triple::systemz)
+      return 8;
+    else
+      return 1;
+  }
+
   bool HasError;
   std::string ErrorStr;
 
@@ -194,22 +207,15 @@ protected:
     return (uint8_t*)Sections[SectionID].Address;
   }
 
-  // Subclasses can override this method to get the alignment requirement of
-  // a common symbol. Returns no alignment requirement if not implemented.
-  virtual unsigned getCommonSymbolAlignment(const SymbolRef &Sym) {
-    return 0;
-  }
-
-
   void writeInt16BE(uint8_t *Addr, uint16_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 8) & 0xFF;
     *(Addr+1) = Value & 0xFF;
   }
 
   void writeInt32BE(uint8_t *Addr, uint32_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 24) & 0xFF;
     *(Addr+1) = (Value >> 16) & 0xFF;
@@ -218,7 +224,7 @@ protected:
   }
 
   void writeInt64BE(uint8_t *Addr, uint64_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 56) & 0xFF;
     *(Addr+1) = (Value >> 48) & 0xFF;
@@ -269,24 +275,16 @@ protected:
 
   /// \brief Resolves relocations from Relocs list with address from Value.
   void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
-  void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
 
   /// \brief A object file specific relocation resolver
-  /// \param Section The section where the relocation is being applied
-  /// \param Offset The offset into the section for this relocation
+  /// \param RE The relocation to be resolved
   /// \param Value Target symbol address to apply the relocation action
-  /// \param Type object file specific relocation type
-  /// \param Addend A constant addend used to compute the value to be stored
-  ///        into the relocatable field
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend) = 0;
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value) = 0;
 
   /// \brief Parses the object file relocation and stores it to Relocations
   ///        or SymbolRelocations (this depends on the object file type).
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
                                     ObjectImage &Obj,
                                     ObjSectionToIDMap &ObjSectionToID,
                                     const SymbolTableMap &Symbols,
@@ -336,6 +334,8 @@ public:
   StringRef getErrorString() { return ErrorStr; }
 
   virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0;
+
+  virtual StringRef getEHFrameSection();
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index bcc3df1..01a3fd9 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -21,16 +21,87 @@ using namespace llvm::object;
 
 namespace llvm {
 
+static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText, intptr_t DeltaForEH) {
+  uint32_t Length = *((uint32_t*)P);
+  P += 4;
+  unsigned char *Ret = P + Length;
+  uint32_t Offset = *((uint32_t*)P);
+  if (Offset == 0) // is a CIE
+    return Ret;
+
+  P += 4;
+  intptr_t FDELocation = *((intptr_t*)P);
+  intptr_t NewLocation = FDELocation - DeltaForText;
+  *((intptr_t*)P) = NewLocation;
+  P += sizeof(intptr_t);
+
+  // Skip the FDE address range
+  P += sizeof(intptr_t);
+
+  uint8_t Augmentationsize = *P;
+  P += 1;
+  if (Augmentationsize != 0) {
+    intptr_t LSDA = *((intptr_t*)P);
+    intptr_t NewLSDA = LSDA - DeltaForEH;
+    *((intptr_t*)P) = NewLSDA;
+  }
+
+  return Ret;
+}
+
+static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) {
+  intptr_t ObjDistance = A->ObjAddress  - B->ObjAddress;
+  intptr_t MemDistance = A->LoadAddress - B->LoadAddress;
+  return ObjDistance - MemDistance;
+}
+
+StringRef RuntimeDyldMachO::getEHFrameSection() {
+  SectionEntry *Text = NULL;
+  SectionEntry *EHFrame = NULL;
+  SectionEntry *ExceptTab = NULL;
+  for (int i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Name == "__eh_frame")
+      EHFrame = &Sections[i];
+    else if (Sections[i].Name == "__text")
+      Text = &Sections[i];
+    else if (Sections[i].Name == "__gcc_except_tab")
+      ExceptTab = &Sections[i];
+  }
+  if (Text == NULL || EHFrame == NULL)
+    return StringRef();
+
+  intptr_t DeltaForText = computeDelta(Text, EHFrame);
+  intptr_t DeltaForEH = 0;
+  if (ExceptTab)
+    DeltaForEH = computeDelta(ExceptTab, EHFrame);
+
+  unsigned char *P = EHFrame->Address;
+  unsigned char *End = P + EHFrame->Size;
+  do  {
+    P = processFDE(P, DeltaForText, DeltaForEH);
+  } while(P != End);
+
+  return StringRef((char*)EHFrame->Address, EHFrame->Size);
+}
+
+void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
+                                         uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+                           RE.IsPCRel, RE.Size);
+}
+
 void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
                                          uint64_t Offset,
                                          uint64_t Value,
                                          uint32_t Type,
-                                         int64_t Addend) {
+                                         int64_t Addend,
+                                         bool isPCRel,
+                                         unsigned LogSize) {
   uint8_t *LocalAddress = Section.Address + Offset;
   uint64_t FinalAddress = Section.LoadAddress + Offset;
-  bool isPCRel = (Type >> 24) & 1;
-  unsigned MachoType = (Type >> 28) & 0xf;
-  unsigned Size = 1 << ((Type >> 25) & 3);
+  unsigned MachoType = Type;
+  unsigned Size = 1 << LogSize;
 
   DEBUG(dbgs() << "resolveRelocation LocalAddress: " 
         << format("%p", LocalAddress)
@@ -205,89 +276,111 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
   return false;
 }
 
-void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
+                                            RelocationRef RelI,
                                             ObjectImage &Obj,
                                             ObjSectionToIDMap &ObjSectionToID,
                                             const SymbolTableMap &Symbols,
                                             StubMap &Stubs) {
+  const ObjectFile *OF = Obj.getObjectFile();
+  const MachOObjectFile *MachO = static_cast<const MachOObjectFile*>(OF);
+  macho::RelocationEntry RE = MachO->getRelocation(RelI.getRawDataRefImpl());
 
-  uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+  uint32_t RelType = MachO->getAnyRelocationType(RE);
   RelocationValueRef Value;
-  SectionEntry &Section = Sections[Rel.SectionID];
+  SectionEntry &Section = Sections[SectionID];
+
+  bool isExtern = MachO->getPlainRelocationExternal(RE);
+  bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+  unsigned Size = MachO->getAnyRelocationLength(RE);
+  uint64_t Offset;
+  RelI.getOffset(Offset);
+  uint8_t *LocalAddress = Section.Address + Offset;
+  unsigned NumBytes = 1 << Size;
+  uint64_t Addend = 0;
+  memcpy(&Addend, LocalAddress, NumBytes);
 
-  bool isExtern = (RelType >> 27) & 1;
   if (isExtern) {
     // Obtain the symbol name which is referenced in the relocation
+    SymbolRef Symbol;
+    RelI.getSymbol(Symbol);
     StringRef TargetName;
-    const SymbolRef &Symbol = Rel.Symbol;
     Symbol.getName(TargetName);
     // First search for the symbol in the local symbol table
     SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
     if (lsi != Symbols.end()) {
       Value.SectionID = lsi->second.first;
-      Value.Addend = lsi->second.second;
+      Value.Addend = lsi->second.second + Addend;
     } else {
       // Search for the symbol in the global symbol table
       SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data());
       if (gsi != GlobalSymbolTable.end()) {
         Value.SectionID = gsi->second.first;
-        Value.Addend = gsi->second.second;
-      } else
+        Value.Addend = gsi->second.second + Addend;
+      } else {
         Value.SymbolName = TargetName.data();
+        Value.Addend = Addend;
+      }
     }
   } else {
-    error_code err;
-    uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF);
-    section_iterator si = Obj.begin_sections(),
-                     se = Obj.end_sections();
-    for (uint8_t i = 1; i < sectionIndex; i++) {
-      error_code err;
-      si.increment(err);
-      if (si == se)
-        break;
-    }
-    assert(si != se && "No section containing relocation!");
-    Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID);
-    Value.Addend = 0;
-    // FIXME: The size and type of the relocation determines if we can
-    // encode an Addend in the target location itself, and if so, how many
-    // bytes we should read in order to get it. We don't yet support doing
-    // that, and just assuming it's sizeof(intptr_t) is blatantly wrong.
-    //Value.Addend = *(const intptr_t *)Target;
-    if (Value.Addend) {
-      // The MachO addend is an offset from the current section.  We need it
-      // to be an offset from the destination section
-      Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
-    }
+    SectionRef Sec = MachO->getRelocationSection(RE);
+    Value.SectionID = findOrEmitSection(Obj, Sec, true, ObjSectionToID);
+    uint64_t Addr;
+    Sec.getAddress(Addr);
+    Value.Addend = Addend - Addr;
   }
 
-  if (Arch == Triple::arm && (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
+  if (Arch == Triple::x86_64 && RelType == macho::RIT_X86_64_GOT) {
+    assert(IsPCRel);
+    assert(Size == 2);
+    StubMap::const_iterator i = Stubs.find(Value);
+    uint8_t *Addr;
+    if (i != Stubs.end()) {
+      Addr = Section.Address + i->second;
+    } else {
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *GOTEntry = Section.Address + Section.StubOffset;
+      RelocationEntry RE(SectionID, Section.StubOffset,
+                         macho::RIT_X86_64_Unsigned, Value.Addend - 4, false,
+                         3);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+      Section.StubOffset += 8;
+      Addr = GOTEntry;
+    }
+    resolveRelocation(Section, Offset, (uint64_t)Addr,
+                      macho::RIT_X86_64_Unsigned, 4, true, 2);
+  } else if (Arch == Triple::arm &&
+             (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
     // This is an ARM branch relocation, need to use a stub function.
 
     //  Look up for existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end())
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + i->second,
-                        RelType, 0);
+                        RelType, 0, IsPCRel, Size);
     else {
       // Create a new stub function.
       Stubs[Value] = Section.StubOffset;
       uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                    Section.StubOffset);
-      RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+      RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                          macho::RIT_Vanilla, Value.Addend);
       if (Value.SymbolName)
         addRelocationForSymbol(RE, Value.SymbolName);
       else
         addRelocationForSection(RE, Value.SectionID);
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
-                        RelType, 0);
+                        RelType, 0, IsPCRel, Size);
       Section.StubOffset += getMaxStubSize();
     }
   } else {
-    RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend,
+                       IsPCRel, Size);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
     else
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 62d8487..df8d3bb 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -16,7 +16,7 @@
 
 #include "RuntimeDyldImpl.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/Format.h"
 
 using namespace llvm;
@@ -25,7 +25,6 @@ using namespace llvm::object;
 
 namespace llvm {
 class RuntimeDyldMachO : public RuntimeDyldImpl {
-protected:
   bool resolveI386Relocation(uint8_t *LocalAddress,
                              uint64_t FinalAddress,
                              uint64_t Value,
@@ -48,22 +47,25 @@ protected:
                             unsigned Size,
                             int64_t Addend);
 
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+  void resolveRelocation(const SectionEntry &Section,
+                         uint64_t Offset,
+                         uint64_t Value,
+                         uint32_t Type,
+                         int64_t Addend,
+                         bool isPCRel,
+                         unsigned Size);
+public:
+  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
                                     ObjectImage &Obj,
                                     ObjSectionToIDMap &ObjSectionToID,
                                     const SymbolTableMap &Symbols,
                                     StubMap &Stubs);
-
-public:
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend);
-
-  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
-  bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual StringRef getEHFrameSection();
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index fb591a8..7761127d 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -1605,6 +1605,29 @@ void AssemblyWriter::printFunction(const Function *F) {
   if (F->isMaterializable())
     Out << "; Materializable\n";
 
+  const AttributeSet &Attrs = F->getAttributes();
+  if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) {
+    AttributeSet AS = Attrs.getFnAttributes();
+    std::string AttrStr;
+
+    unsigned Idx = 0;
+    for (unsigned E = AS.getNumSlots(); Idx != E; ++Idx)
+      if (AS.getSlotIndex(Idx) == AttributeSet::FunctionIndex)
+        break;
+
+    for (AttributeSet::iterator I = AS.begin(Idx), E = AS.end(Idx);
+         I != E; ++I) {
+      Attribute Attr = *I;
+      if (!Attr.isStringAttribute()) {
+        if (!AttrStr.empty()) AttrStr += ' ';
+        AttrStr += Attr.getAsString();
+      }
+    }
+
+    if (!AttrStr.empty())
+      Out << "; Function Attrs: " << AttrStr << '\n';
+  }
+
   if (F->isDeclaration())
     Out << "declare ";
   else
@@ -1620,7 +1643,6 @@ void AssemblyWriter::printFunction(const Function *F) {
   }
 
   FunctionType *FT = F->getFunctionType();
-  const AttributeSet &Attrs = F->getAttributes();
   if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
     Out <<  Attrs.getAsString(AttributeSet::ReturnIndex) << ' ';
   TypePrinter.print(F->getReturnType(), Out);
@@ -1761,10 +1783,8 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
-  if (AnnotationWriter) {
+  if (AnnotationWriter)
     AnnotationWriter->printInfoComment(V, Out);
-    return;
-  }
 }
 
 // This member is called for each Instruction in a function..
diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h
index ad2670d..0b6228b 100644
--- a/contrib/llvm/lib/IR/AttributeImpl.h
+++ b/contrib/llvm/lib/IR/AttributeImpl.h
@@ -228,7 +228,7 @@ public:
   /// is the index of the return, parameter, or function object that the
   /// attributes are applied to, not the index into the AttrNodes list where the
   /// attributes reside.
-  uint64_t getSlotIndex(unsigned Slot) const {
+  unsigned getSlotIndex(unsigned Slot) const {
     return AttrNodes[Slot].first;
   }
 
@@ -248,15 +248,15 @@ public:
   typedef AttributeSetNode::iterator       iterator;
   typedef AttributeSetNode::const_iterator const_iterator;
 
-  iterator begin(unsigned Idx)
-    { return AttrNodes[Idx].second->begin(); }
-  iterator end(unsigned Idx)
-    { return AttrNodes[Idx].second->end(); }
+  iterator begin(unsigned Slot)
+    { return AttrNodes[Slot].second->begin(); }
+  iterator end(unsigned Slot)
+    { return AttrNodes[Slot].second->end(); }
 
-  const_iterator begin(unsigned Idx) const
-    { return AttrNodes[Idx].second->begin(); }
-  const_iterator end(unsigned Idx) const
-    { return AttrNodes[Idx].second->end(); }
+  const_iterator begin(unsigned Slot) const
+    { return AttrNodes[Slot].second->begin(); }
+  const_iterator end(unsigned Slot) const
+    { return AttrNodes[Slot].second->end(); }
 
   void Profile(FoldingSetNodeID &ID) const {
     Profile(ID, AttrNodes);
@@ -270,7 +270,7 @@ public:
   }
 
   // FIXME: This atrocity is temporary.
-  uint64_t Raw(uint64_t Index) const;
+  uint64_t Raw(unsigned Index) const;
 };
 
 } // end llvm namespace
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index 2d82891..4fe6f9d 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -195,6 +195,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "readnone";
   if (hasAttribute(Attribute::ReadOnly))
     return "readonly";
+  if (hasAttribute(Attribute::Returned))
+    return "returned";
   if (hasAttribute(Attribute::ReturnsTwice))
     return "returns_twice";
   if (hasAttribute(Attribute::SExt))
@@ -393,6 +395,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::SanitizeThread:  return 1ULL << 36;
   case Attribute::SanitizeMemory:  return 1ULL << 37;
   case Attribute::NoBuiltin:       return 1ULL << 38;
+  case Attribute::Returned:        return 1ULL << 39;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -481,11 +484,12 @@ unsigned AttributeSetNode::getStackAlignment() const {
 }
 
 std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
-  std::string Str = "";
+  std::string Str;
   for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
-         E = AttrList.end(); I != E; ) {
+         E = AttrList.end(); I != E; ++I) {
+    if (I != AttrList.begin())
+      Str += ' ';
     Str += I->getAsString(InAttrGrp);
-    if (++I != E) Str += " ";
   }
   return Str;
 }
@@ -494,7 +498,7 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
 // AttributeSetImpl Definition
 //===----------------------------------------------------------------------===//
 
-uint64_t AttributeSetImpl::Raw(uint64_t Index) const {
+uint64_t AttributeSetImpl::Raw(unsigned Index) const {
   for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) {
     if (getSlotIndex(I) != Index) continue;
     const AttributeSetNode *ASN = AttrNodes[I].second;
@@ -592,7 +596,7 @@ AttributeSet AttributeSet::get(LLVMContext &C,
   return getImpl(C, Attrs);
 }
 
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) {
   if (!B.hasAttributes())
     return AttributeSet();
 
@@ -604,29 +608,29 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
       continue;
 
     if (Kind == Attribute::Alignment)
-      Attrs.push_back(std::make_pair(Idx, Attribute::
+      Attrs.push_back(std::make_pair(Index, Attribute::
                                      getWithAlignment(C, B.getAlignment())));
     else if (Kind == Attribute::StackAlignment)
-      Attrs.push_back(std::make_pair(Idx, Attribute::
+      Attrs.push_back(std::make_pair(Index, Attribute::
                               getWithStackAlignment(C, B.getStackAlignment())));
     else
-      Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind)));
+      Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
   }
 
   // Add target-dependent (string) attributes.
   for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end();
        I != E; ++I)
-    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second)));
+    Attrs.push_back(std::make_pair(Index, Attribute::get(C, I->first,I->second)));
 
   return get(C, Attrs);
 }
 
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
                                ArrayRef<Attribute::AttrKind> Kind) {
   SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
   for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
          E = Kind.end(); I != E; ++I)
-    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I)));
+    Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I)));
   return get(C, Attrs);
 }
 
@@ -643,20 +647,20 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
   return getImpl(C, AttrNodeVec);
 }
 
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
                                         Attribute::AttrKind Attr) const {
-  if (hasAttribute(Idx, Attr)) return *this;
-  return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+  if (hasAttribute(Index, Attr)) return *this;
+  return addAttributes(C, Index, AttributeSet::get(C, Index, Attr));
 }
 
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
                                         StringRef Kind) const {
   llvm::AttrBuilder B;
   B.addAttribute(Kind);
-  return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+  return addAttributes(C, Index, AttributeSet::get(C, Index, B));
 }
 
-AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index,
                                          AttributeSet Attrs) const {
   if (!pImpl) return Attrs;
   if (!Attrs.pImpl) return *this;
@@ -664,8 +668,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment. For now, say
   // we can't change a known alignment.
-  unsigned OldAlign = getParamAlignment(Idx);
-  unsigned NewAlign = Attrs.getParamAlignment(Idx);
+  unsigned OldAlign = getParamAlignment(Index);
+  unsigned NewAlign = Attrs.getParamAlignment(Index);
   assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
          "Attempt to change alignment!");
 #endif
@@ -676,8 +680,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
   AttributeSet AS;
   uint64_t LastIndex = 0;
   for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
-    if (getSlotIndex(I) >= Idx) {
-      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+    if (getSlotIndex(I) >= Index) {
+      if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
       break;
     }
     LastIndex = I + 1;
@@ -686,17 +690,17 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
 
   // Now add the attribute into the correct slot. There may already be an
   // AttributeSet there.
-  AttrBuilder B(AS, Idx);
+  AttrBuilder B(AS, Index);
 
   for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
-    if (Attrs.getSlotIndex(I) == Idx) {
+    if (Attrs.getSlotIndex(I) == Index) {
       for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I),
              IE = Attrs.pImpl->end(I); II != IE; ++II)
         B.addAttribute(*II);
       break;
     }
 
-  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+  AttrSet.push_back(AttributeSet::get(C, Index, B));
 
   // Add the remaining attribute slots.
   for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -705,13 +709,13 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
   return get(C, AttrSet);
 }
 
-AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Index,
                                            Attribute::AttrKind Attr) const {
-  if (!hasAttribute(Idx, Attr)) return *this;
-  return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+  if (!hasAttribute(Index, Attr)) return *this;
+  return removeAttributes(C, Index, AttributeSet::get(C, Index, Attr));
 }
 
-AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
                                             AttributeSet Attrs) const {
   if (!pImpl) return AttributeSet();
   if (!Attrs.pImpl) return *this;
@@ -719,7 +723,7 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't pass in alignment, which no current use does.
-  assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) &&
+  assert(!Attrs.hasAttribute(Index, Attribute::Alignment) &&
          "Attempt to change alignment!");
 #endif
 
@@ -729,8 +733,8 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
   AttributeSet AS;
   uint64_t LastIndex = 0;
   for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
-    if (getSlotIndex(I) >= Idx) {
-      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+    if (getSlotIndex(I) >= Index) {
+      if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
       break;
     }
     LastIndex = I + 1;
@@ -739,15 +743,15 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
 
   // Now remove the attribute from the correct slot. There may already be an
   // AttributeSet there.
-  AttrBuilder B(AS, Idx);
+  AttrBuilder B(AS, Index);
 
   for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
-    if (Attrs.getSlotIndex(I) == Idx) {
-      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx);
+    if (Attrs.getSlotIndex(I) == Index) {
+      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index);
       break;
     }
 
-  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+  AttrSet.push_back(AttributeSet::get(C, Index, B));
 
   // Add the remaining attribute slots.
   for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -764,11 +768,11 @@ LLVMContext &AttributeSet::getContext() const {
   return pImpl->getContext();
 }
 
-AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const {
-  return pImpl && hasAttributes(Idx) ?
+AttributeSet AttributeSet::getParamAttributes(unsigned Index) const {
+  return pImpl && hasAttributes(Index) ?
     AttributeSet::get(pImpl->getContext(),
                       ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
-                        std::make_pair(Idx, getAttributes(Idx)))) :
+                        std::make_pair(Index, getAttributes(Index)))) :
     AttributeSet();
 }
 
@@ -848,27 +852,27 @@ std::string AttributeSet::getAsString(unsigned Index,
 }
 
 /// \brief The attributes for the specified index are returned.
-AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const {
+AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const {
   if (!pImpl) return 0;
 
   // Loop through to find the attribute node we want.
   for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
-    if (pImpl->getSlotIndex(I) == Idx)
+    if (pImpl->getSlotIndex(I) == Index)
       return pImpl->getSlotNode(I);
 
   return 0;
 }
 
-AttributeSet::iterator AttributeSet::begin(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::begin(unsigned Slot) const {
   if (!pImpl)
     return ArrayRef<Attribute>().begin();
-  return pImpl->begin(Idx);
+  return pImpl->begin(Slot);
 }
 
-AttributeSet::iterator AttributeSet::end(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::end(unsigned Slot) const {
   if (!pImpl)
     return ArrayRef<Attribute>().end();
-  return pImpl->end(Idx);
+  return pImpl->end(Slot);
 }
 
 //===----------------------------------------------------------------------===//
@@ -882,7 +886,7 @@ unsigned AttributeSet::getNumSlots() const {
   return pImpl ? pImpl->getNumAttributes() : 0;
 }
 
-uint64_t AttributeSet::getSlotIndex(unsigned Slot) const {
+unsigned AttributeSet::getSlotIndex(unsigned Slot) const {
   assert(pImpl && Slot < pImpl->getNumAttributes() &&
          "Slot # out of range!");
   return pImpl->getSlotIndex(Slot);
@@ -919,13 +923,13 @@ void AttributeSet::dump() const {
 // AttrBuilder Method Implementations
 //===----------------------------------------------------------------------===//
 
-AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx)
+AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
   : Attrs(0), Alignment(0), StackAlignment(0) {
   AttributeSetImpl *pImpl = AS.pImpl;
   if (!pImpl) return;
 
   for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) {
-    if (pImpl->getSlotIndex(I) != Idx) continue;
+    if (pImpl->getSlotIndex(I) != Index) continue;
 
     for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
            IE = pImpl->end(I); II != IE; ++II)
@@ -982,16 +986,16 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
 }
 
 AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
-  unsigned Idx = ~0U;
+  unsigned Slot = ~0U;
   for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
     if (A.getSlotIndex(I) == Index) {
-      Idx = I;
+      Slot = I;
       break;
     }
 
-  assert(Idx != ~0U && "Couldn't find index in AttributeSet!");
+  assert(Slot != ~0U && "Couldn't find index in AttributeSet!");
 
-  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) {
+  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
       Attribute::AttrKind Kind = I->getKindAsEnum();
@@ -1069,16 +1073,16 @@ bool AttrBuilder::hasAttributes() const {
 }
 
 bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
-  unsigned Idx = ~0U;
+  unsigned Slot = ~0U;
   for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
     if (A.getSlotIndex(I) == Index) {
-      Idx = I;
+      Slot = I;
       break;
     }
 
-  assert(Idx != ~0U && "Couldn't find the index!");
+  assert(Slot != ~0U && "Couldn't find the index!");
 
-  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx);
+  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot);
        I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
@@ -1109,33 +1113,6 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
   return Alignment == B.Alignment && StackAlignment == B.StackAlignment;
 }
 
-void AttrBuilder::removeFunctionOnlyAttrs() {
-  removeAttribute(Attribute::NoReturn)
-    .removeAttribute(Attribute::NoUnwind)
-    .removeAttribute(Attribute::ReadNone)
-    .removeAttribute(Attribute::ReadOnly)
-    .removeAttribute(Attribute::NoInline)
-    .removeAttribute(Attribute::AlwaysInline)
-    .removeAttribute(Attribute::OptimizeForSize)
-    .removeAttribute(Attribute::StackProtect)
-    .removeAttribute(Attribute::StackProtectReq)
-    .removeAttribute(Attribute::StackProtectStrong)
-    .removeAttribute(Attribute::NoRedZone)
-    .removeAttribute(Attribute::NoImplicitFloat)
-    .removeAttribute(Attribute::Naked)
-    .removeAttribute(Attribute::InlineHint)
-    .removeAttribute(Attribute::StackAlignment)
-    .removeAttribute(Attribute::UWTable)
-    .removeAttribute(Attribute::NonLazyBind)
-    .removeAttribute(Attribute::ReturnsTwice)
-    .removeAttribute(Attribute::SanitizeAddress)
-    .removeAttribute(Attribute::SanitizeThread)
-    .removeAttribute(Attribute::SanitizeMemory)
-    .removeAttribute(Attribute::MinSize)
-    .removeAttribute(Attribute::NoDuplicate)
-    .removeAttribute(Attribute::NoBuiltin);
-}
-
 AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
   // FIXME: Remove this in 4.0.
   if (!Val) return *this;
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index 1abb656..2c6971c 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -237,18 +237,21 @@ void Constant::destroyConstantImpl() {
   delete this;
 }
 
-/// canTrap - Return true if evaluation of this constant could trap.  This is
-/// true for things like constant expressions that could divide by zero.
-bool Constant::canTrap() const {
-  assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+static bool canTrapImpl(const Constant *C,
+                        SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) {
+  assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
   // The only thing that could possibly trap are constant exprs.
-  const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
-  if (!CE) return false;
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE)
+    return false;
 
   // ConstantExpr traps if any operands can trap.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (CE->getOperand(i)->canTrap())
-      return true;
+  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+    if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
+      if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps))
+        return true;
+    }
+  }
 
   // Otherwise, only specific operations can trap.
   switch (CE->getOpcode()) {
@@ -267,6 +270,13 @@ bool Constant::canTrap() const {
   }
 }
 
+/// canTrap - Return true if evaluation of this constant could trap.  This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+  SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
+  return canTrapImpl(this, NonTrappingOps);
+}
+
 /// isThreadDependent - Return true if the value can vary between threads.
 bool Constant::isThreadDependent() const {
   SmallPtrSet<const Constant*, 64> Visited;
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
index e995858..32bed95 100644
--- a/contrib/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -318,7 +318,7 @@ struct ExprMapKeyType {
       ArrayRef<Constant*> ops,
       unsigned short flags = 0,
       unsigned short optionalflags = 0,
-      ArrayRef<unsigned> inds = ArrayRef<unsigned>())
+      ArrayRef<unsigned> inds = None)
         : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
         operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
   uint8_t opcode;
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index 983b49c..889d574 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -21,7 +21,9 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
@@ -1301,6 +1303,53 @@ void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
   unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
 }
 
+LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar) {
+  switch (unwrap<GlobalVariable>(GlobalVar)->getThreadLocalMode()) {
+  case GlobalVariable::NotThreadLocal:
+    return LLVMNotThreadLocal;
+  case GlobalVariable::GeneralDynamicTLSModel:
+    return LLVMGeneralDynamicTLSModel;
+  case GlobalVariable::LocalDynamicTLSModel:
+    return LLVMLocalDynamicTLSModel;
+  case GlobalVariable::InitialExecTLSModel:
+    return LLVMInitialExecTLSModel;
+  case GlobalVariable::LocalExecTLSModel:
+    return LLVMLocalExecTLSModel;
+  }
+
+  llvm_unreachable("Invalid GlobalVariable thread local mode");
+}
+
+void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+
+  switch (Mode) {
+  case LLVMNotThreadLocal:
+    GV->setThreadLocalMode(GlobalVariable::NotThreadLocal);
+    break;
+  case LLVMGeneralDynamicTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::GeneralDynamicTLSModel);
+    break;
+  case LLVMLocalDynamicTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
+    break;
+  case LLVMInitialExecTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+    break;
+  case LLVMLocalExecTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::LocalExecTLSModel);
+    break;
+  }
+}
+
+LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isExternallyInitialized();
+}
+
+void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
+  unwrap<GlobalVariable>(GlobalVar)->setExternallyInitialized(IsExtInit);
+}
+
 /*--.. Operations on aliases ......................................--*/
 
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
@@ -1396,6 +1445,18 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Func->setAttributes(PALnew);
 }
 
+void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
+                                        const char *V) {
+  Function *Func = unwrap<Function>(Fn);
+  AttributeSet::AttrIndex Idx =
+    AttributeSet::AttrIndex(AttributeSet::FunctionIndex);
+  AttrBuilder B;
+
+  B.addAttribute(A, V);
+  AttributeSet Set = AttributeSet::get(Func->getContext(), Idx, B);
+  Func->addAttributes(Idx, Set);
+}
+
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttributeSet PAL = Func->getAttributes();
@@ -2331,6 +2392,42 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
   return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, 
+                               LLVMValueRef PTR, LLVMValueRef Val, 
+                               LLVMAtomicOrdering ordering, 
+                               LLVMBool singleThread) {
+  AtomicRMWInst::BinOp intop;
+  switch (op) {
+    case LLVMAtomicRMWBinOpXchg: intop = AtomicRMWInst::Xchg; break;
+    case LLVMAtomicRMWBinOpAdd: intop = AtomicRMWInst::Add; break;
+    case LLVMAtomicRMWBinOpSub: intop = AtomicRMWInst::Sub; break;
+    case LLVMAtomicRMWBinOpAnd: intop = AtomicRMWInst::And; break;
+    case LLVMAtomicRMWBinOpNand: intop = AtomicRMWInst::Nand; break;
+    case LLVMAtomicRMWBinOpOr: intop = AtomicRMWInst::Or; break;
+    case LLVMAtomicRMWBinOpXor: intop = AtomicRMWInst::Xor; break;
+    case LLVMAtomicRMWBinOpMax: intop = AtomicRMWInst::Max; break;
+    case LLVMAtomicRMWBinOpMin: intop = AtomicRMWInst::Min; break;
+    case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break;
+    case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break;
+  }
+  AtomicOrdering intordering;
+  switch (ordering) {
+    case LLVMAtomicOrderingNotAtomic: intordering = NotAtomic; break;
+    case LLVMAtomicOrderingUnordered: intordering = Unordered; break;
+    case LLVMAtomicOrderingMonotonic: intordering = Monotonic; break;
+    case LLVMAtomicOrderingAcquire: intordering = Acquire; break;
+    case LLVMAtomicOrderingRelease: intordering = Release; break;
+    case LLVMAtomicOrderingAcquireRelease: 
+      intordering = AcquireRelease; 
+      break;
+    case LLVMAtomicOrderingSequentiallyConsistent: 
+      intordering = SequentiallyConsistent; 
+      break;
+  }
+  return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), 
+    intordering, singleThread ? SingleThread : CrossThread));
+}
+
 
 /*===-- Module providers --------------------------------------------------===*/
 
@@ -2397,6 +2494,13 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
       StringRef(BufferName)));
 }
 
+const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf) {
+  return unwrap(MemBuf)->getBufferStart();
+}
+
+size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf) {
+  return unwrap(MemBuf)->getBufferSize();
+}
 
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
   delete unwrap(MemBuf);
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index 9d6e840..0980e80 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -61,6 +61,9 @@ void DIBuilder::finalize() {
 
   DIArray GVs = getOrCreateArray(AllGVs);
   DIType(TempGVs).replaceAllUsesWith(GVs);
+
+  DIArray IMs = getOrCreateArray(AllImportedModules);
+  DIType(TempImportedModules).replaceAllUsesWith(IMs);
 }
 
 /// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
@@ -101,6 +104,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
 
   TempGVs = MDNode::getTemporary(VMContext, TElts);
 
+  TempImportedModules = MDNode::getTemporary(VMContext, TElts);
+
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
     createFilePathPair(VMContext, Filename, Directory),
@@ -113,6 +118,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
     TempRetainTypes,
     TempSubprograms,
     TempGVs,
+    TempImportedModules,
     MDString::get(VMContext, SplitName)
   };
   TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
@@ -122,6 +128,21 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
   NMD->addOperand(TheCU);
 }
 
+DIImportedModule DIBuilder::createImportedModule(DIScope Context,
+                                                 DINameSpace NS,
+                                                 unsigned Line) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_imported_module),
+    Context,
+    NS,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+  };
+  DIImportedModule M(MDNode::get(VMContext, Elts));
+  assert(M.Verify() && "Imported module should be valid");
+  AllImportedModules.push_back(M);
+  return M;
+}
+
 /// createFile - Create a file descriptor to hold debugging information
 /// for a file.
 DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
@@ -225,7 +246,8 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
   return DIDerivedType(MDNode::get(VMContext, Elts));
 }
 
-DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) {
+DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy,
+                                                 DIType Base) {
   // Pointer types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
@@ -427,7 +449,7 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
 DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
                                              DIFile File, unsigned LineNumber,
                                              StringRef GetterName,
-                                             StringRef SetterName, 
+                                             StringRef SetterName,
                                              unsigned PropertyAttributes,
                                              DIType Ty) {
   Value *Elts[] = {
@@ -601,7 +623,7 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
 DICompositeType DIBuilder::createEnumerationType(
     DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
     uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-    DIType ClassType) {
+    DIType UnderlyingType) {
   // TAG_enumeration_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
@@ -613,7 +635,7 @@ DICompositeType DIBuilder::createEnumerationType(
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ClassType,
+    UnderlyingType,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     Constant::getNullValue(Type::getInt32Ty(VMContext))
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index ecd5216..5658f56 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -41,7 +41,7 @@ char DataLayout::ID = 0;
 // Support for StructLayout
 //===----------------------------------------------------------------------===//
 
-StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
+StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
   assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
   StructAlignment = 0;
   StructSize = 0;
@@ -50,7 +50,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
   // Loop over each of the elements, placing them in memory.
   for (unsigned i = 0, e = NumElements; i != e; ++i) {
     Type *Ty = ST->getElementType(i);
-    unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+    unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);
 
     // Add padding if necessary to align the data element properly.
     if ((StructSize & (TyAlign-1)) != 0)
@@ -60,7 +60,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
     StructAlignment = std::max(TyAlign, StructAlignment);
 
     MemberOffsets[i] = StructSize;
-    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+    StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item
   }
 
   // Empty structures have alignment of 1 byte.
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
index 0ffe99d..ec83dca 100644
--- a/contrib/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -64,7 +64,8 @@ bool DIDescriptor::Verify() const {
           DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
           DIObjCProperty(DbgNode).Verify() ||
           DITemplateTypeParameter(DbgNode).Verify() ||
-          DITemplateValueParameter(DbgNode).Verify());
+          DITemplateValueParameter(DbgNode).Verify() ||
+          DIImportedModule(DbgNode).Verify());
 }
 
 static Value *getField(const MDNode *DbgNode, unsigned Elt) {
@@ -336,6 +337,12 @@ bool DIDescriptor::isEnumerator() const {
 bool DIDescriptor::isObjCProperty() const {
   return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
 }
+
+/// \brief Return true if the specified tag is DW_TAG_imported_module.
+bool DIDescriptor::isImportedModule() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_imported_module;
+}
+
 //===----------------------------------------------------------------------===//
 // Simple Descriptor Constructors and other Methods
 //===----------------------------------------------------------------------===//
@@ -418,7 +425,7 @@ bool DICompileUnit::Verify() const {
   if (N.empty())
     return false;
   // It is possible that directory and produce string is empty.
-  return DbgNode->getNumOperands() == 12;
+  return DbgNode->getNumOperands() == 13;
 }
 
 /// Verify - Verify that an ObjC property is well formed.
@@ -580,6 +587,11 @@ bool DITemplateValueParameter::Verify() const {
   return isTemplateValueParameter() && DbgNode->getNumOperands() == 8;
 }
 
+/// \brief Verify that the imported module descriptor is well formed.
+bool DIImportedModule::Verify() const {
+  return isImportedModule() && DbgNode->getNumOperands() == 4;
+}
+
 /// getOriginalTypeSize - If this type is derived from a base type then
 /// return base type size.
 uint64_t DIDerivedType::getOriginalTypeSize() const {
@@ -694,7 +706,7 @@ StringRef DIScope::getDirectory() const {
 }
 
 DIArray DICompileUnit::getEnumTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
@@ -703,7 +715,7 @@ DIArray DICompileUnit::getEnumTypes() const {
 }
 
 DIArray DICompileUnit::getRetainedTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
@@ -712,7 +724,7 @@ DIArray DICompileUnit::getRetainedTypes() const {
 }
 
 DIArray DICompileUnit::getSubprograms() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
@@ -722,7 +734,7 @@ DIArray DICompileUnit::getSubprograms() const {
 
 
 DIArray DICompileUnit::getGlobalVariables() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
@@ -730,6 +742,15 @@ DIArray DICompileUnit::getGlobalVariables() const {
   return DIArray();
 }
 
+DIArray DICompileUnit::getImportedModules() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+    return DIArray(N);
+  return DIArray();
+}
+
 /// fixupObjcLikeName - Replace contains special characters used
 /// in a typical Objective-C names with '.' in a given string.
 static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
@@ -1054,8 +1075,13 @@ void DIScope::printInternal(raw_ostream &OS) const {
 
 void DICompileUnit::printInternal(raw_ostream &OS) const {
   DIScope::printInternal(OS);
-  if (const char *Lang = dwarf::LanguageString(getLanguage()))
-    OS << " [" << Lang << ']';
+  OS << " [";
+  unsigned Lang = getLanguage();
+  if (const char *LangStr = dwarf::LanguageString(Lang))
+    OS << LangStr;
+  else
+    (OS << "lang 0x").write_hex(Lang);
+  OS << ']';
 }
 
 void DIEnumerator::printInternal(raw_ostream &OS) const {
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index 1e72b90..7f7efab 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -124,6 +124,13 @@ bool Argument::hasStructRetAttr() const {
     hasAttribute(1, Attribute::StructRet);
 }
 
+/// hasReturnedAttr - Return true if this argument has the returned attribute on
+/// it in its containing function.
+bool Argument::hasReturnedAttr() const {
+  return getParent()->getAttributes().
+    hasAttribute(getArgNo()+1, Attribute::Returned);
+}
+
 /// addAttr - Add attributes to an argument.
 void Argument::addAttr(AttributeSet AS) {
   assert(AS.getNumSlots() <= 1 &&
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index 0228aeb..6a6b7af 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -403,42 +403,6 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
   }
 }
 
-MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
-  if (!A || !B)
-    return NULL;
-
-  if (A == B)
-    return A;
-
-  SmallVector<MDNode *, 4> PathA;
-  MDNode *T = A;
-  while (T) {
-    PathA.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
-  }
-
-  SmallVector<MDNode *, 4> PathB;
-  T = B;
-  while (T) {
-    PathB.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
-  }
-
-  int IA = PathA.size() - 1;
-  int IB = PathB.size() - 1;
-
-  MDNode *Ret = 0;
-  while (IA >= 0 && IB >=0) {
-    if (PathA[IA] == PathB[IB])
-      Ret = PathA[IA];
-    else
-      break;
-    --IA;
-    --IB;
-  }
-  return Ret;
-}
-
 MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
   if (!A || !B)
     return NULL;
diff --git a/contrib/llvm/lib/IR/PassManager.cpp b/contrib/llvm/lib/IR/PassManager.cpp
index 3c968aa..387094a 100644
--- a/contrib/llvm/lib/IR/PassManager.cpp
+++ b/contrib/llvm/lib/IR/PassManager.cpp
@@ -42,14 +42,14 @@ namespace llvm {
 
 // Different debug levels that can be enabled...
 enum PassDebugLevel {
-  None, Arguments, Structure, Executions, Details
+  Disabled, Arguments, Structure, Executions, Details
 };
 
 static cl::opt<enum PassDebugLevel>
 PassDebugging("debug-pass", cl::Hidden,
                   cl::desc("Print PassManager debugging information"),
                   cl::values(
-  clEnumVal(None      , "disable debug output"),
+  clEnumVal(Disabled  , "disable debug output"),
   clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
   clEnumVal(Structure , "print pass structure before run()"),
   clEnumVal(Executions, "print pass name before it is executed"),
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
index 1e6a51a..46c61fc 100644
--- a/contrib/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -380,7 +380,7 @@ FunctionType *FunctionType::get(Type *ReturnType,
 }
 
 FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
-  return get(Result, ArrayRef<Type *>(), isVarArg);
+  return get(Result, None, isVarArg);
 }
 
 /// isValidReturnType - Return true if the specified type is valid as a return
@@ -499,7 +499,7 @@ StructType *StructType::create(LLVMContext &Context, StringRef Name) {
 }
 
 StructType *StructType::get(LLVMContext &Context, bool isPacked) {
-  return get(Context, llvm::ArrayRef<Type*>(), isPacked);
+  return get(Context, None, isPacked);
 }
 
 StructType *StructType::get(Type *type, ...) {
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index adc702e..89a3c05 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -118,7 +118,7 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
   for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
       return true;
-    if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+    if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator
       break;
   }
 
@@ -333,6 +333,7 @@ namespace {
 // Various metrics for how much to strip off of pointers.
 enum PointerStripKind {
   PSK_ZeroIndices,
+  PSK_ZeroIndicesAndAliases,
   PSK_InBoundsConstantIndices,
   PSK_InBounds
 };
@@ -350,6 +351,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       switch (StripKind) {
+      case PSK_ZeroIndicesAndAliases:
       case PSK_ZeroIndices:
         if (!GEP->hasAllZeroIndices())
           return V;
@@ -367,7 +369,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
       V = cast<Operator>(V)->getOperand(0);
     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-      if (GA->mayBeOverridden())
+      if (StripKind == PSK_ZeroIndices || GA->mayBeOverridden())
         return V;
       V = GA->getAliasee();
     } else {
@@ -381,6 +383,10 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
 } // namespace
 
 Value *Value::stripPointerCasts() {
+  return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
+}
+
+Value *Value::stripPointerCastsNoFollowAliases() {
   return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
 }
 
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 8bfbb32..d106173 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -301,9 +301,12 @@ namespace {
     bool VerifyIntrinsicType(Type *Ty,
                              ArrayRef<Intrinsic::IITDescriptor> &Infos,
                              SmallVectorImpl<Type*> &ArgTys);
-    void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+    bool VerifyAttributeCount(AttributeSet Attrs, unsigned Params);
+    void VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+                              bool isFunction, const Value *V);
+    void VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
                               bool isReturnValue, const Value *V);
-    void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs,
+    void VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
                              const Value *V);
 
     void WriteValue(const Value *V) {
@@ -446,6 +449,30 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
     }
   }
 
+  if (GV.hasName() && (GV.getName() == "llvm.used" ||
+                       GV.getName() == "llvm.compiler_used")) {
+    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+            "invalid linkage for intrinsic global variable", &GV);
+    Type *GVType = GV.getType()->getElementType();
+    if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) {
+      PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType());
+      Assert1(PTy, "wrong type for intrinsic global variable", &GV);
+      if (GV.hasInitializer()) {
+        Constant *Init = GV.getInitializer();
+        ConstantArray *InitArray = dyn_cast<ConstantArray>(Init);
+        Assert1(InitArray, "wrong initalizer for intrinsic global variable",
+                Init);
+        for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) {
+          Value *V = Init->getOperand(i)->stripPointerCasts();
+          // stripPointerCasts strips aliases, so we only need to check for
+          // variables and functions.
+          Assert1(isa<GlobalVariable>(V) || isa<Function>(V),
+                  "invalid llvm.used member", V);
+        }
+      }
+    }
+  }
+
   visitGlobalValue(GV);
 }
 
@@ -626,44 +653,74 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*>&SeenIDs,
   }
 }
 
+void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+                                    bool isFunction, const Value* V) {
+  unsigned Slot = ~0U;
+  for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I)
+    if (Attrs.getSlotIndex(I) == Idx) {
+      Slot = I;
+      break;
+    }
+
+  assert(Slot != ~0U && "Attribute set inconsistency!");
+
+  for (AttributeSet::iterator I = Attrs.begin(Slot), E = Attrs.end(Slot);
+         I != E; ++I) {
+    if (I->isStringAttribute())
+      continue;
+
+    if (I->getKindAsEnum() == Attribute::NoReturn ||
+        I->getKindAsEnum() == Attribute::NoUnwind ||
+        I->getKindAsEnum() == Attribute::ReadNone ||
+        I->getKindAsEnum() == Attribute::ReadOnly ||
+        I->getKindAsEnum() == Attribute::NoInline ||
+        I->getKindAsEnum() == Attribute::AlwaysInline ||
+        I->getKindAsEnum() == Attribute::OptimizeForSize ||
+        I->getKindAsEnum() == Attribute::StackProtect ||
+        I->getKindAsEnum() == Attribute::StackProtectReq ||
+        I->getKindAsEnum() == Attribute::StackProtectStrong ||
+        I->getKindAsEnum() == Attribute::NoRedZone ||
+        I->getKindAsEnum() == Attribute::NoImplicitFloat ||
+        I->getKindAsEnum() == Attribute::Naked ||
+        I->getKindAsEnum() == Attribute::InlineHint ||
+        I->getKindAsEnum() == Attribute::StackAlignment ||
+        I->getKindAsEnum() == Attribute::UWTable ||
+        I->getKindAsEnum() == Attribute::NonLazyBind ||
+        I->getKindAsEnum() == Attribute::ReturnsTwice ||
+        I->getKindAsEnum() == Attribute::SanitizeAddress ||
+        I->getKindAsEnum() == Attribute::SanitizeThread ||
+        I->getKindAsEnum() == Attribute::SanitizeMemory ||
+        I->getKindAsEnum() == Attribute::MinSize ||
+        I->getKindAsEnum() == Attribute::NoDuplicate ||
+        I->getKindAsEnum() == Attribute::NoBuiltin) {
+      if (!isFunction)
+          CheckFailed("Attribute '" + I->getKindAsString() +
+                      "' only applies to functions!", V);
+          return;
+    } else if (isFunction) {
+        CheckFailed("Attribute '" + I->getKindAsString() +
+                    "' does not apply to functions!", V);
+        return;
+    }
+  }
+}
+
 // VerifyParameterAttrs - Check the given attributes for an argument or return
 // value of the specified type.  The value V is printed in error messages.
-void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
                                     bool isReturnValue, const Value *V) {
   if (!Attrs.hasAttributes(Idx))
     return;
 
-  Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoUnwind) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReadOnly) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoInline) &&
-          !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) &&
-          !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackProtect) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoRedZone) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) &&
-          !Attrs.hasAttribute(Idx, Attribute::Naked) &&
-          !Attrs.hasAttribute(Idx, Attribute::InlineHint) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackAlignment) &&
-          !Attrs.hasAttribute(Idx, Attribute::UWTable) &&
-          !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) &&
-          !Attrs.hasAttribute(Idx, Attribute::MinSize) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoBuiltin),
-          "Some attributes in '" + Attrs.getAsString(Idx) +
-          "' only apply to functions!", V);
+  VerifyAttributeTypes(Attrs, Idx, false, V);
 
   if (isReturnValue)
     Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
             !Attrs.hasAttribute(Idx, Attribute::Nest) &&
             !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
-            !Attrs.hasAttribute(Idx, Attribute::NoCapture),
-            "Attribute 'byval', 'nest', 'sret', and 'nocapture' "
+            !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
+            !Attrs.hasAttribute(Idx, Attribute::Returned),
+            "Attribute 'byval', 'nest', 'sret', 'nocapture', and 'returned' "
             "do not apply to return values!", V);
 
   // Check for mutually incompatible attributes.
@@ -683,6 +740,10 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
              Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes "
           "'byval, nest, and inreg' are incompatible!", V);
 
+  Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+            Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes "
+          "'sret and returned' are incompatible!", V);
+
   Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
             Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes "
           "'zeroext and signext' are incompatible!", V);
@@ -712,81 +773,51 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
 
 // VerifyFunctionAttrs - Check parameter attributes against a function type.
 // The value V is printed in error messages.
-void Verifier::VerifyFunctionAttrs(FunctionType *FT,
-                                   const AttributeSet &Attrs,
+void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
                                    const Value *V) {
   if (Attrs.isEmpty())
     return;
 
   bool SawNest = false;
+  bool SawReturned = false;
 
   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
-    unsigned Index = Attrs.getSlotIndex(i);
+    unsigned Idx = Attrs.getSlotIndex(i);
 
     Type *Ty;
-    if (Index == 0)
+    if (Idx == 0)
       Ty = FT->getReturnType();
-    else if (Index-1 < FT->getNumParams())
-      Ty = FT->getParamType(Index-1);
+    else if (Idx-1 < FT->getNumParams())
+      Ty = FT->getParamType(Idx-1);
     else
       break;  // VarArgs attributes, verified elsewhere.
 
-    VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V);
+    VerifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V);
 
-    if (Attrs.hasAttribute(i, Attribute::Nest)) {
+    if (Idx == 0)
+      continue;
+
+    if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
       Assert1(!SawNest, "More than one parameter has attribute nest!", V);
       SawNest = true;
     }
 
-    if (Attrs.hasAttribute(Index, Attribute::StructRet))
-      Assert1(Index == 1, "Attribute sret is not on first parameter!", V);
+    if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+      Assert1(!SawReturned, "More than one parameter has attribute returned!",
+              V);
+      Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible "
+              "argument and return types for 'returned' attribute", V);
+      SawReturned = true;
+    }
+
+    if (Attrs.hasAttribute(Idx, Attribute::StructRet))
+      Assert1(Idx == 1, "Attribute sret is not on first parameter!", V);
   }
 
   if (!Attrs.hasAttributes(AttributeSet::FunctionIndex))
     return;
 
-  AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex);
-  NotFn.removeFunctionOnlyAttrs();
-  Assert1(NotFn.empty(), "Attributes '" +
-          AttributeSet::get(V->getContext(),
-                            AttributeSet::FunctionIndex,
-                            NotFn).getAsString(AttributeSet::FunctionIndex) +
-          "' do not apply to the function!", V);
-
-  // Check for mutually incompatible attributes.
-  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::StructRet)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::StructRet))),
-          "Attributes 'byval, nest, and sret' are incompatible!", V);
-
-  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::InReg)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::InReg))),
-          "Attributes 'byval, nest, and inreg' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::ZExt) &&
-            Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::SExt)),
-          "Attributes 'zeroext and signext' are incompatible!", V);
+  VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V);
 
   Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
                                Attribute::ReadNone) &&
@@ -801,7 +832,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
           "Attributes 'noinline and alwaysinline' are incompatible!", V);
 }
 
-static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) {
+bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) {
   if (Attrs.getNumSlots() == 0)
     return true;
 
@@ -837,7 +868,7 @@ void Verifier::visitFunction(Function &F) {
   Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
           "Invalid struct return type!", &F);
 
-  const AttributeSet &Attrs = F.getAttributes();
+  AttributeSet Attrs = F.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
           "Attribute after last parameter!", &F);
@@ -1350,7 +1381,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
             "Call parameter type does not match function signature!",
             CS.getArgument(i), FTy->getParamType(i), I);
 
-  const AttributeSet &Attrs = CS.getAttributes();
+  AttributeSet Attrs = CS.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
           "Attribute after last parameter!", I);
@@ -1358,15 +1389,41 @@ void Verifier::VerifyCallSite(CallSite CS) {
   // Verify call attributes.
   VerifyFunctionAttrs(FTy, Attrs, I);
 
-  if (FTy->isVarArg())
+  if (FTy->isVarArg()) {
+    // FIXME? is 'nest' even legal here?
+    bool SawNest = false;
+    bool SawReturned = false;
+
+    for (unsigned Idx = 1; Idx < 1 + FTy->getNumParams(); ++Idx) {
+      if (Attrs.hasAttribute(Idx, Attribute::Nest))
+        SawNest = true;
+      if (Attrs.hasAttribute(Idx, Attribute::Returned))
+        SawReturned = true;
+    }
+
     // Check attributes on the varargs part.
     for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
-      VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(),
-                           false, I);
+      Type *Ty = CS.getArgument(Idx-1)->getType(); 
+      VerifyParameterAttrs(Attrs, Idx, Ty, false, I);
+      
+      if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
+        Assert1(!SawNest, "More than one parameter has attribute nest!", I);
+        SawNest = true;
+      }
+
+      if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+        Assert1(!SawReturned, "More than one parameter has attribute returned!",
+                I);
+        Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
+                "Incompatible argument and return types for 'returned' "
+                "attribute", I);
+        SawReturned = true;
+      }
 
       Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet),
               "Attribute 'sret' cannot be used for vararg call arguments!", I);
     }
+  }
 
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
   if (CS.getCalledFunction() == 0 ||
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index 74cbdad..d2e13c9 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -13,21 +13,15 @@
 
 #include "llvm/Linker.h"
 #include "llvm-c/Linker.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <cctype>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -35,6 +29,8 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 namespace {
+  typedef SmallPtrSet<StructType*, 32> TypeSet;
+
 class TypeMapTy : public ValueMapTypeRemapper {
   /// MappedTypes - This is a mapping from a source type to a destination type
   /// to use.
@@ -55,6 +51,9 @@ class TypeMapTy : public ValueMapTypeRemapper {
   SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
 
 public:
+  TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {}
+
+  TypeSet &DstStructTypesSet;
   /// addTypeMapping - Indicate that the specified type in the destination
   /// module is conceptually equivalent to the specified type in the source
   /// module.
@@ -331,13 +330,20 @@ Type *TypeMapTy::getImpl(Type *Ty) {
   StructType *STy = cast<StructType>(Ty);
   
   // If the type is opaque, we can just use it directly.
-  if (STy->isOpaque())
+  if (STy->isOpaque()) {
+    // A named structure type from src module is used. Add it to the Set of
+    // identified structs in the destination module.
+    DstStructTypesSet.insert(STy);
     return *Entry = STy;
+  }
   
   // Otherwise we create a new type and resolve its body later.  This will be
   // resolved by the top level of get().
   SrcDefinitionsToResolve.push_back(STy);
   StructType *DTy = StructType::create(STy->getContext());
+  // A new identified structure type was created. Add it to the set of
+  // identified structs in the destination module.
+  DstStructTypesSet.insert(DTy);
   DstResolvedOpaqueTypes.insert(DTy);
   return *Entry = DTy;
 }
@@ -379,8 +385,8 @@ namespace {
   public:
     std::string ErrorMsg;
     
-    ModuleLinker(Module *dstM, Module *srcM, unsigned mode)
-      : DstM(dstM), SrcM(srcM), Mode(mode) { }
+    ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, unsigned mode)
+      : DstM(dstM), SrcM(srcM), TypeMap(Set), Mode(mode) { }
     
     bool run();
     
@@ -594,11 +600,6 @@ void ModuleLinker::computeTypeMapping() {
   SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
                                                  SrcStructTypes.end());
 
-  TypeFinder DstStructTypes;
-  DstStructTypes.run(*DstM, true);
-  SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
-                                                 DstStructTypes.end());
-
   for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) {
     StructType *ST = SrcStructTypes[i];
     if (!ST->hasName()) continue;
@@ -629,7 +630,7 @@ void ModuleLinker::computeTypeMapping() {
       // we prefer to take the '%C' version. So we are then left with both
       // '%C.1' and '%C' being used for the same types. This leads to some
       // variables using one type and some using the other.
-      if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST))
+      if (!SrcStructTypesSet.count(DST) && TypeMap.DstStructTypesSet.count(DST))
         TypeMap.addTypeMapping(DST, ST);
   }
 
@@ -1287,6 +1288,25 @@ bool ModuleLinker::run() {
   return false;
 }
 
+Linker::Linker(Module *M) : Composite(M) {
+  TypeFinder StructTypes;
+  StructTypes.run(*M, true);
+  IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end());
+}
+
+Linker::~Linker() {
+}
+
+bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) {
+  ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode);
+  if (TheLinker.run()) {
+    if (ErrorMsg)
+      *ErrorMsg = TheLinker.ErrorMsg;
+    return true;
+  }
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // LinkModules entrypoint.
 //===----------------------------------------------------------------------===//
@@ -1298,13 +1318,8 @@ bool ModuleLinker::run() {
 /// and shouldn't be relied on to be consistent.
 bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, 
                          std::string *ErrorMsg) {
-  ModuleLinker TheLinker(Dest, Src, Mode);
-  if (TheLinker.run()) {
-    if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
-    return true;
-  }
-
-  return false;
+  Linker L(Dest);
+  return L.linkInModule(Src, Mode, ErrorMsg);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp
deleted file mode 100644
index 74d24f2..0000000
--- a/contrib/llvm/lib/Linker/Linker.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===- lib/Linker/Linker.cpp - Basic Linker functionality  ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains basic Linker functionality that all usages will need.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Linker.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-using namespace llvm;
-
-Linker::Linker(StringRef progname, StringRef modname,
-               LLVMContext& C, unsigned flags):
-  Context(C),
-  Composite(new Module(modname, C)),
-  Flags(flags),
-  Error(),
-  ProgramName(progname) { }
-
-Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
-  Context(aModule->getContext()),
-  Composite(aModule),
-  Flags(flags),
-  Error(),
-  ProgramName(progname) { }
-
-Linker::~Linker() {
-  delete Composite;
-}
-
-bool
-Linker::error(StringRef message) {
-  Error = message;
-  if (!(Flags&QuietErrors))
-    errs() << ProgramName << ": error: " << message << "\n";
-  return true;
-}
-
-bool
-Linker::warning(StringRef message) {
-  Error = message;
-  if (!(Flags&QuietWarnings))
-    errs() << ProgramName << ": warning: " << message << "\n";
-  return false;
-}
-
-void
-Linker::verbose(StringRef message) {
-  if (Flags&Verbose)
-    errs() << "  " << message << "\n";
-}
-
-Module*
-Linker::releaseModule() {
-  Module* result = Composite;
-  Error.clear();
-  Composite = 0;
-  Flags = 0;
-  return result;
-}
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
index 51bb435..9e60884 100644
--- a/contrib/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -87,10 +87,10 @@ MCAsmInfo::MCAsmInfo() {
   SupportsDebugInformation = false;
   ExceptionsType = ExceptionHandling::None;
   DwarfUsesInlineInfoSection = false;
-  DwarfSectionOffsetDirective = 0;
   DwarfUsesRelocationsAcrossSections = true;
   DwarfRegNumForCFI = false;
   HasMicrosoftFastStdCallMangling = false;
+  NeedsDwarfSectionOffsetDirective = false;
 }
 
 MCAsmInfo::~MCAsmInfo() {
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
index fd79193..33350d9 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -36,8 +36,8 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
   // Set up DWARF directives
   HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
   SupportsDebugInformation = true;
-  DwarfSectionOffsetDirective = "\t.secrel32\t";
   HasMicrosoftFastStdCallMangling = true;
+  NeedsDwarfSectionOffsetDirective = true;
 }
 
 void MCAsmInfoMicrosoft::anchor() { }
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index 35613b4..9e86785 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -124,19 +124,15 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
-  virtual void ChangeSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection);
 
   virtual void InitSections() {
     InitToTextSection();
   }
 
   virtual void InitToTextSection() {
-    // FIXME, this is MachO specific, but the testsuite
-    // expects this.
-    SwitchSection(getContext().getMachOSection(
-                                      "__TEXT", "__text",
-                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      0, SectionKind::getText()));
+    SwitchSection(getContext().getObjectFileInfo()->getTextSection());
   }
 
   virtual void EmitLabel(MCSymbol *Symbol);
@@ -333,9 +329,10 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
   return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
 }
 
-void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+void MCAsmStreamer::ChangeSection(const MCSection *Section,
+                                  const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
-  Section->PrintSwitchToSection(MAI, OS);
+  Section->PrintSwitchToSection(MAI, OS, Subsection);
 }
 
 void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
@@ -642,7 +639,8 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
 
 
 void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
   if (Data.empty()) return;
 
   if (Data.size() == 1) {
@@ -673,7 +671,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
 
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   unsigned AddrSpace) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
   const char *Directive = 0;
   switch (Size) {
   default: break;
@@ -1368,7 +1367,8 @@ void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
 }
 
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
 
   // Show the encoding in a comment if we have a code emitter.
   if (Emitter)
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index 1829266..fb5ab28 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -243,6 +243,36 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
     A->getSectionList().push_back(this);
 }
 
+MCSectionData::iterator
+MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
+  if (Subsection == 0 && SubsectionFragmentMap.empty())
+    return end();
+
+  SmallVectorImpl<std::pair<unsigned, MCFragment *> >::iterator MI =
+    std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(),
+                     std::make_pair(Subsection, (MCFragment *)0));
+  bool ExactMatch = false;
+  if (MI != SubsectionFragmentMap.end()) {
+    ExactMatch = MI->first == Subsection;
+    if (ExactMatch)
+      ++MI;
+  }
+  iterator IP;
+  if (MI == SubsectionFragmentMap.end())
+    IP = end();
+  else
+    IP = MI->second;
+  if (!ExactMatch && Subsection != 0) {
+    // The GNU as documentation claims that subsections have an alignment of 4,
+    // although this appears not to be the case.
+    MCFragment *F = new MCDataFragment();
+    SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F));
+    getFragmentList().insert(IP, F);
+    F->setParent(this);
+  }
+  return IP;
+}
+
 /* *** */
 
 MCSymbolData::MCSymbolData() : Symbol(0) {}
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 0f8f074..18982e9 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -197,6 +197,8 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
   // actually a DW_LNE_end_sequence.
 
   // Switch to the section to be able to create a symbol at its end.
+  // TODO: keep track of the last subsection so that this symbol appears in the
+  // correct place.
   MCOS->SwitchSection(Section);
 
   MCContext &context = MCOS->getContext();
@@ -787,7 +789,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
   if (Symbol->isTemporary())
     return;
   MCContext &context = MCOS->getContext();
-  if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+  if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
     return;
 
   // The dwarf label's name does not have the symbol name's leading
@@ -899,7 +901,7 @@ namespace {
     /// EmitCompactUnwind - Emit the unwind information in a compact way. If
     /// we're successful, return 'true'. Otherwise, return 'false' and it will
     /// emit the normal CIE and FDE.
-    bool EmitCompactUnwind(MCStreamer &streamer,
+    void EmitCompactUnwind(MCStreamer &streamer,
                            const MCDwarfFrameInfo &frame);
 
     const MCSymbol &EmitCIE(MCStreamer &streamer,
@@ -1139,7 +1141,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
 /// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
 /// successful, return 'true'. Otherwise, return 'false' and it will emit the
 /// normal CIE and FDE.
-bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
                                          const MCDwarfFrameInfo &Frame) {
   MCContext &Context = Streamer.getContext();
   const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
@@ -1168,14 +1170,13 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
   //   .quad except_tab1
 
   uint32_t Encoding = Frame.CompactUnwindEncoding;
-  if (!Encoding) return false;
+  if (!Encoding) return;
+  bool DwarfEHFrameOnly = (Encoding == MOFI->getCompactUnwindDwarfEHFrameOnly());
 
   // The encoding needs to know we have an LSDA.
-  if (Frame.Lsda)
+  if (!DwarfEHFrameOnly && Frame.Lsda)
     Encoding |= 0x40000000;
 
-  Streamer.SwitchSection(MOFI->getCompactUnwindSection());
-
   // Range Start
   unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI);
   unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
@@ -1194,11 +1195,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
                                       Twine::utohexstr(Encoding));
   Streamer.EmitIntValue(Encoding, Size);
 
-
   // Personality Function
   Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
   if (VerboseAsm) Streamer.AddComment("Personality Function");
-  if (Frame.Personality)
+  if (!DwarfEHFrameOnly && Frame.Personality)
     Streamer.EmitSymbolValue(Frame.Personality, Size);
   else
     Streamer.EmitIntValue(0, Size); // No personality fn
@@ -1206,12 +1206,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
   // LSDA
   Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
   if (VerboseAsm) Streamer.AddComment("LSDA");
-  if (Frame.Lsda)
+  if (!DwarfEHFrameOnly && Frame.Lsda)
     Streamer.EmitSymbolValue(Frame.Lsda, Size);
   else
     Streamer.EmitIntValue(0, Size); // No LSDA
-
-  return true;
 }
 
 const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
@@ -1421,7 +1419,6 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   }
 
   // Call Frame Instructions
-
   EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
 
   // Padding
@@ -1482,12 +1479,23 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
   ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
 
   // Emit the compact unwind info if available.
-  if (IsEH && MOFI->getCompactUnwindSection())
-    for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
-      const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
-      if (Frame.CompactUnwindEncoding)
+  if (IsEH && MOFI->getCompactUnwindSection()) {
+    unsigned NumFrameInfos = Streamer.getNumFrameInfos();
+    bool SectionEmitted = false;
+
+    if (NumFrameInfos) {
+      for (unsigned i = 0; i < NumFrameInfos; ++i) {
+        const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+        if (Frame.CompactUnwindEncoding == 0) continue;
+        if (!SectionEmitted) {
+          Streamer.SwitchSection(MOFI->getCompactUnwindSection());
+          Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+          SectionEmitted = true;
+        }
         Emitter.EmitCompactUnwind(Streamer, Frame);
+      }
     }
+  }
 
   const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() :
                                     *MOFI->getDwarfFrameSection();
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
index 7f5f1b6..116f86f 100644
--- a/contrib/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
@@ -108,14 +109,15 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("invalid assembler flag!");
 }
 
-void MCELFStreamer::ChangeSection(const MCSection *Section) {
+void MCELFStreamer::ChangeSection(const MCSection *Section,
+                                  const MCExpr *Subsection) {
   MCSectionData *CurSection = getCurrentSectionData();
   if (CurSection && CurSection->isBundleLocked())
     report_fatal_error("Unterminated .bundle_lock when changing a section");
   const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
   if (Grp)
     getAssembler().getOrCreateSymbolData(*Grp);
-  this->MCObjectStreamer::ChangeSection(Section);
+  this->MCObjectStreamer::ChangeSection(Section, Subsection);
 }
 
 void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
@@ -126,6 +128,26 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
   Alias->setVariableValue(Value);
 }
 
+// When GNU as encounters more than one .type declaration for an object it seems
+// to use a mechanism similar to the one below to decide which type is actually
+// used in the object file.  The greater of T1 and T2 is selected based on the
+// following ordering:
+//  STT_NOTYPE < STT_OBJECT < STT_FUNC < STT_GNU_IFUNC < STT_TLS < anything else
+// If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user
+// provided type).
+static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
+  unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
+                             ELF::STT_GNU_IFUNC, ELF::STT_TLS};
+  for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) {
+    if (T1 == TypeOrdering[i])
+      return T2;
+    if (T2 == TypeOrdering[i])
+      return T1;
+  }
+
+  return T2;
+}
+
 void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           MCSymbolAttr Attribute) {
   // Indirect symbols are handled differently, to match how 'as' handles
@@ -187,27 +209,34 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     break;
 
   case MCSA_ELF_TypeFunction:
-    MCELF::SetType(SD, ELF::STT_FUNC);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_FUNC));
     break;
 
   case MCSA_ELF_TypeIndFunction:
-    MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_GNU_IFUNC));
     break;
 
   case MCSA_ELF_TypeObject:
-    MCELF::SetType(SD, ELF::STT_OBJECT);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_OBJECT));
     break;
 
   case MCSA_ELF_TypeTLS:
-    MCELF::SetType(SD, ELF::STT_TLS);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_TLS));
     break;
 
   case MCSA_ELF_TypeCommon:
-    MCELF::SetType(SD, ELF::STT_COMMON);
+    // TODO: Emit these as a common symbol.
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_OBJECT));
     break;
 
   case MCSA_ELF_TypeNoType:
-    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_NOTYPE));
     break;
 
   case MCSA_Protected:
@@ -290,7 +319,7 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
 // entry in the module's symbol table (the first being the null symbol).
 void MCELFStreamer::EmitFileDirective(StringRef Filename) {
   MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
   Symbol->setAbsolute();
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -406,11 +435,13 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
       // Optimize memory usage by emitting the instruction to a
       // MCCompactEncodedInstFragment when not in a bundle-locked group and
       // there are no fixups registered.
-      MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD);
+      MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment();
+      insert(CEIF);
       CEIF->getContents().append(Code.begin(), Code.end());
       return;
     } else {
-      DF = new MCDataFragment(SD);
+      DF = new MCDataFragment();
+      insert(DF);
       if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
         // If this is a new fragment created for a bundle-locked group, and the
         // group was marked as "align_to_end", set a flag in the fragment.
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index cd4d144..06bc72f 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -250,6 +250,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Mips_GOT_LO16: return "GOT_LO16";
   case VK_Mips_CALL_HI16: return "CALL_HI16";
   case VK_Mips_CALL_LO16: return "CALL_LO16";
+  case VK_COFF_IMGREL32: return "IMGREL32";
   }
   llvm_unreachable("Invalid variant kind");
 }
@@ -285,6 +286,44 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("dtpoff", VK_DTPOFF)
     .Case("TLVP", VK_TLVP)
     .Case("tlvp", VK_TLVP)
+    .Case("IMGREL", VK_COFF_IMGREL32)
+    .Case("imgrel", VK_COFF_IMGREL32)
+    .Case("SECREL32", VK_SECREL)
+    .Case("secrel32", VK_SECREL)
+    .Case("HA", VK_PPC_GAS_HA16)
+    .Case("ha", VK_PPC_GAS_HA16)
+    .Case("L", VK_PPC_GAS_LO16)
+    .Case("l", VK_PPC_GAS_LO16)
+    .Case("TOCBASE", VK_PPC_TOC)
+    .Case("tocbase", VK_PPC_TOC)
+    .Case("TOC", VK_PPC_TOC_ENTRY)
+    .Case("toc", VK_PPC_TOC_ENTRY)
+    .Case("TOC@HA", VK_PPC_TOC16_HA)
+    .Case("toc@ha", VK_PPC_TOC16_HA)
+    .Case("TOC@L", VK_PPC_TOC16_LO)
+    .Case("toc@l", VK_PPC_TOC16_LO)
+    .Case("TLS", VK_PPC_TLS)
+    .Case("tls", VK_PPC_TLS)
+    .Case("TPREL@HA", VK_PPC_TPREL16_HA)
+    .Case("tprel@ha", VK_PPC_TPREL16_HA)
+    .Case("TPREL@L", VK_PPC_TPREL16_LO)
+    .Case("tprel@l", VK_PPC_TPREL16_LO)
+    .Case("DTPREL@HA", VK_PPC_DTPREL16_HA)
+    .Case("dtprel@ha", VK_PPC_DTPREL16_HA)
+    .Case("DTPREL@L", VK_PPC_DTPREL16_LO)
+    .Case("dtprel@l", VK_PPC_DTPREL16_LO)
+    .Case("GOT@TPREL@HA", VK_PPC_GOT_TPREL16_HA)
+    .Case("got@tprel@ha", VK_PPC_GOT_TPREL16_HA)
+    .Case("GOT@TPREL@L", VK_PPC_GOT_TPREL16_LO)
+    .Case("got@tprel@l", VK_PPC_GOT_TPREL16_LO)
+    .Case("GOT@TLSGD@HA", VK_PPC_GOT_TLSGD16_HA)
+    .Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD16_HA)
+    .Case("GOT@TLSGD@L", VK_PPC_GOT_TLSGD16_LO)
+    .Case("got@tlsgd@l", VK_PPC_GOT_TLSGD16_LO)
+    .Case("GOT@TLSLD@HA", VK_PPC_GOT_TLSLD16_HA)
+    .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD16_HA)
+    .Case("GOT@TLSLD@L", VK_PPC_GOT_TLSLD16_LO)
+    .Case("got@tlsld@l", VK_PPC_GOT_TLSLD16_LO)
     .Default(VK_Invalid);
 }
 
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
index 7d08d0e..e08b01b 100644
--- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -122,11 +122,11 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
   // isSymbolLinkerVisible uses the section.
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
   if (getAssembler().isSymbolLinkerVisible(*Symbol))
-    new MCDataFragment(getCurrentSectionData());
+    insert(new MCDataFragment());
 
   MCObjectStreamer::EmitLabel(Symbol);
 
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
index c872b22..659706a 100644
--- a/contrib/llvm/lib/MC/MCNullStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -30,13 +30,14 @@ namespace {
     virtual void InitSections() {
     }
 
-    virtual void ChangeSection(const MCSection *Section) {
+    virtual void ChangeSection(const MCSection *Section,
+                               const MCExpr *Subsection) {
     }
 
     virtual void EmitLabel(MCSymbol *Symbol) {
       assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-      assert(getCurrentSection() && "Cannot emit before setting section!");
-      Symbol->setSection(*getCurrentSection());
+      assert(getCurrentSection().first &&"Cannot emit before setting section!");
+      Symbol->setSection(*getCurrentSection().first);
     }
     virtual void EmitDebugLabel(MCSymbol *Symbol) {
       EmitLabel(Symbol);
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index d19e79a..96b62f1 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -145,12 +145,16 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
   LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                      SectionKind::getReadOnlyWithRel());
 
-  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) {
     CompactUnwindSection =
       Ctx->getMachOSection("__LD", "__compact_unwind",
                            MCSectionMachO::S_ATTR_DEBUG,
                            SectionKind::getReadOnly());
 
+    if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
+      CompactUnwindDwarfEHFrameOnly = 0x04000000;
+  }
+
   // Debug Information.
   DwarfAccelNamesSection =
     Ctx->getMachOSection("__DWARF", "__apple_names",
@@ -291,6 +295,22 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
     FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
       dwarf::DW_EH_PE_udata8;
+  } else if (T.getArch() == Triple::systemz) {
+    // All currently-defined code models guarantee that 4-byte PC-relative
+    // values will be in range.
+    if (RelocM == Reloc::PIC_) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      FDEEncoding = dwarf::DW_EH_PE_absptr;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
   }
 
   // Solaris requires different flags for .eh_frame to seemingly every other
@@ -629,6 +649,8 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
   PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
     TTypeEncoding = dwarf::DW_EH_PE_absptr;
 
+  CompactUnwindDwarfEHFrameOnly = 0;
+
   EHFrameSection = 0;             // Created on demand.
   CompactUnwindSection = 0;       // Used only by selected targets.
   DwarfAccelNamesSection = 0;     // Used only by selected targets.
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 0d2ce83..d21ce8d 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
@@ -45,14 +46,15 @@ void MCObjectStreamer::reset() {
   if (Assembler)
     Assembler->reset();
   CurSectionData = 0;
+  CurInsertionPoint = MCSectionData::iterator();
   MCStreamer::reset();
 }
 
 MCFragment *MCObjectStreamer::getCurrentFragment() const {
   assert(getCurrentSectionData() && "No current section!");
 
-  if (!getCurrentSectionData()->empty())
-    return &getCurrentSectionData()->getFragmentList().back();
+  if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin())
+    return prior(CurInsertionPoint);
 
   return 0;
 }
@@ -61,8 +63,10 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   // When bundling is enabled, we don't want to add data to a fragment that
   // already has instructions (see MCELFStreamer::EmitInstToData for details)
-  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions()))
-    F = new MCDataFragment(getCurrentSectionData());
+  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
+    F = new MCDataFragment();
+    insert(F);
+  }
   return F;
 }
 
@@ -145,7 +149,7 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
     return;
   }
   Value = ForceExpAbs(Value);
-  new MCLEBFragment(*Value, false, getCurrentSectionData());
+  insert(new MCLEBFragment(*Value, false));
 }
 
 void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
@@ -155,7 +159,7 @@ void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
     return;
   }
   Value = ForceExpAbs(Value);
-  new MCLEBFragment(*Value, true, getCurrentSectionData());
+  insert(new MCLEBFragment(*Value, true));
 }
 
 void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
@@ -163,10 +167,20 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
   report_fatal_error("This file format doesn't support weak aliases.");
 }
 
-void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+void MCObjectStreamer::ChangeSection(const MCSection *Section,
+                                     const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
 
   CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+
+  int64_t IntSubsection = 0;
+  if (Subsection &&
+      !Subsection->EvaluateAsAbsolute(IntSubsection, getAssembler()))
+    report_fatal_error("Cannot evaluate subsection number");
+  if (IntSubsection < 0 || IntSubsection > 8192)
+    report_fatal_error("Subsection number out of range");
+  CurInsertionPoint =
+    CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
 }
 
 void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -185,7 +199,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 
   // Now that a machine instruction has been assembled into this section, make
   // a line entry for any .loc directive that has been seen.
-  MCLineEntry::Make(this, getCurrentSection());
+  MCLineEntry::Make(this, getCurrentSection().first);
 
   // If this instruction doesn't need relaxation, just emit it as data.
   MCAssembler &Assembler = getAssembler();
@@ -216,8 +230,8 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
   // Always create a new, separate fragment here, because its size can change
   // during relaxation.
-  MCRelaxableFragment *IF =
-    new MCRelaxableFragment(Inst, getCurrentSectionData());
+  MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+  insert(IF);
 
   SmallString<128> Code;
   raw_svector_ostream VecOS(Code);
@@ -258,7 +272,7 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
     return;
   }
   AddrDelta = ForceExpAbs(AddrDelta);
-  new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+  insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
 }
 
 void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
@@ -270,7 +284,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
     return;
   }
   AddrDelta = ForceExpAbs(AddrDelta);
-  new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+  insert(new MCDwarfCallFrameFragment(*AddrDelta));
 }
 
 void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
@@ -284,8 +298,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                             unsigned MaxBytesToEmit) {
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      getCurrentSectionData());
+  insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -302,7 +315,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
                                          unsigned char Value) {
   int64_t Res;
   if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
-    new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+    insert(new MCOrgFragment(*Offset, Value));
     return false;
   }
 
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index 804734c..edefdb4 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -201,9 +201,9 @@ public:
   }
 
   virtual bool Warning(SMLoc L, const Twine &Msg,
-                       ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+                       ArrayRef<SMRange> Ranges = None);
   virtual bool Error(SMLoc L, const Twine &Msg,
-                     ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+                     ArrayRef<SMRange> Ranges = None);
 
   virtual const AsmToken &Lex();
 
@@ -221,6 +221,7 @@ public:
 
   bool parseExpression(const MCExpr *&Res);
   virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool parseAbsoluteExpression(int64_t &Res);
 
@@ -285,7 +286,7 @@ private:
 
   void PrintMacroInstantiations();
   void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
-                    ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const {
+                    ArrayRef<SMRange> Ranges = None) const {
     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
   }
   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
@@ -601,7 +602,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // If we are generating dwarf for assembly source files save the initial text
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
-    getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+    getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
     MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
     getStreamer().EmitLabel(SectionStartSym);
     getContext().setGenDwarfSectionStartSym(SectionStartSym);
@@ -666,7 +667,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
 }
 
 void AsmParser::checkForValidSection() {
-  if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
+  if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
     TokError("expected section directive before assembly directive");
     Out.InitToTextSection();
   }
@@ -869,6 +870,10 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
   return parseExpression(Res, EndLoc);
 }
 
+bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  return ParsePrimaryExpr(Res, EndLoc);
+}
+
 const MCExpr *
 AsmParser::ApplyModifierToExpr(const MCExpr *E,
                                MCSymbolRefExpr::VariantKind Variant) {
@@ -1087,7 +1092,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
     MCBinaryExpr::Opcode Dummy;
     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
     if (TokPrec < NextTokPrec) {
-      if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
+      if (ParseBinOpRHS(TokPrec+1, RHS, EndLoc)) return true;
     }
 
     // Merge LHS and RHS according to operator.
@@ -1488,7 +1493,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
   // section is the initial text section then generate a .loc directive for
   // the instruction.
   if (!HadError && getContext().getGenDwarfForAssembly() &&
-      getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) {
+      getContext().getGenDwarfSection() ==
+      getStreamer().getCurrentSection().first) {
 
     unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
 
@@ -1978,7 +1984,6 @@ static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
   case MCExpr::Binary: {
     const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value);
     return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS());
-    break;
   }
   case MCExpr::Target:
   case MCExpr::Constant:
@@ -2479,7 +2484,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 
   // Check whether we should use optimal code alignment for this .align
   // directive.
-  bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+  bool UseCodeAlign = getStreamer().getCurrentSection().first->UseCodeAlign();
   if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
       ValueSize == 1 && UseCodeAlign) {
     getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
@@ -2631,12 +2636,10 @@ bool AsmParser::ParseDirectiveLoc() {
             Flags |= DWARF2_FLAG_IS_STMT;
           else
             return Error(Loc, "is_stmt value not 0 or 1");
-        }
-        else {
+        } else {
           return Error(Loc, "is_stmt value not the constant value of 0 or 1");
         }
-      }
-      else if (Name == "isa") {
+      } else if (Name == "isa") {
         Loc = getTok().getLoc();
         const MCExpr *Value;
         if (parseExpression(Value))
@@ -2647,16 +2650,13 @@ bool AsmParser::ParseDirectiveLoc() {
           if (Value < 0)
             return Error(Loc, "isa number less than zero");
           Isa = Value;
-        }
-        else {
+        } else {
           return Error(Loc, "isa number not a constant value");
         }
-      }
-      else if (Name == "discriminator") {
+      } else if (Name == "discriminator") {
         if (parseAbsoluteExpression(Discriminator))
           return true;
-      }
-      else {
+      } else {
         return Error(Loc, "unknown sub-directive in '.loc' directive");
       }
 
@@ -3615,18 +3615,17 @@ bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
 bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-      Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
-                          " an .elseif");
+    Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+                        " an .elseif");
   TheCondState.TheCond = AsmCond::ElseIfCond;
 
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
-      LastIgnoreState = TheCondStack.back().Ignore;
+    LastIgnoreState = TheCondStack.back().Ignore;
   if (LastIgnoreState || TheCondState.CondMet) {
     TheCondState.Ignore = true;
     eatToEndOfStatement();
-  }
-  else {
+  } else {
     int64_t ExprValue;
     if (parseAbsoluteExpression(ExprValue))
       return true;
@@ -3652,8 +3651,8 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
 
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-      Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
-                          ".elseif");
+    Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+                        ".elseif");
   TheCondState.TheCond = AsmCond::ElseCond;
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
@@ -4046,19 +4045,17 @@ static int RewritesSort(const void *A, const void *B) {
   if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
     return 1;
 
-  // It's possible to have a SizeDirective rewrite and an Input/Output rewrite
-  // to the same location.  Make sure the SizeDirective rewrite is performed
-  // first.  This also ensure the sort algorithm is stable.
-  if (AsmRewriteA->Kind == AOK_SizeDirective) {
-    assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) &&
-            "Expected an Input/Output rewrite!");
+  // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
+  // rewrite to the same location.  Make sure the SizeDirective rewrite is
+  // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
+  // ensures the sort algorithm is stable.
+  if (AsmRewritePrecedence [AsmRewriteA->Kind] >
+      AsmRewritePrecedence [AsmRewriteB->Kind])
     return -1;
-  }
-  if (AsmRewriteB->Kind == AOK_SizeDirective) {
-    assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) &&
-            "Expected an Input/Output rewrite!");
+
+  if (AsmRewritePrecedence [AsmRewriteA->Kind] <
+      AsmRewritePrecedence [AsmRewriteB->Kind])
     return 1;
-  }
   llvm_unreachable ("Unstable rewrite sort.");
 }
 
@@ -4118,28 +4115,27 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
       }
 
       // Expr/Input or Output.
-      bool IsVarDecl;
-      unsigned Length, Size, Type;
-      void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
-                                                  Length, Size, Type,
-                                                  IsVarDecl);
+      StringRef SymName = Operand->getSymName();
+      if (SymName.empty())
+        continue;
+
+      void *OpDecl = Operand->getOpDecl();
       if (!OpDecl)
         continue;
 
       bool isOutput = (i == 1) && Desc.mayStore();
+      SMLoc Start = SMLoc::getFromPointer(SymName.data());
       if (isOutput) {
         ++InputIdx;
         OutputDecls.push_back(OpDecl);
         OutputDeclsAddressOf.push_back(Operand->needAddressOf());
         OutputConstraints.push_back('=' + Operand->getConstraint().str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(),
-                                            Operand->getNameLen()));
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
       } else {
         InputDecls.push_back(OpDecl);
         InputDeclsAddressOf.push_back(Operand->needAddressOf());
         InputConstraints.push_back(Operand->getConstraint().str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(),
-                                            Operand->getNameLen()));
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
       }
     }
   }
@@ -4182,20 +4178,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
   for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
                                              E = AsmStrRewrites.end();
        I != E; ++I) {
+    AsmRewriteKind Kind = (*I).Kind;
+    if (Kind == AOK_Delete)
+      continue;
+
     const char *Loc = (*I).Loc.getPointer();
     assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
 
-    unsigned AdditionalSkip = 0;
-    AsmRewriteKind Kind = (*I).Kind;
-
     // Emit everything up to the immediate/expression.
     unsigned Len = Loc - AsmStart;
-    if (Len) {
-      // For Input/Output operands we need to remove the brackets, if present.
-      if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[')
-        --Len;
+    if (Len)
       OS << StringRef(AsmStart, Len);
-    }
 
     // Skip the original expression.
     if (Kind == AOK_Skip) {
@@ -4203,6 +4196,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
       continue;
     }
 
+    unsigned AdditionalSkip = 0;
     // Rewrite expressions in $N notation.
     switch (Kind) {
     default: break;
@@ -4249,11 +4243,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
 
     // Skip the original expression.
     AsmStart = Loc + (*I).Len + AdditionalSkip;
-
-    // For Input/Output operands we need to remove the brackets, if present.
-    if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd &&
-        *AsmStart == ']')
-      ++AsmStart;
   }
 
   // Emit the remainder of the asm string.
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 6d6409f..7eb8b74 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -566,10 +566,10 @@ bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 /// ParseDirectivePrevious:
 ///   ::= .previous
 bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
-  const MCSection *PreviousSection = getStreamer().getPreviousSection();
-  if (PreviousSection == NULL)
+  MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection.first == NULL)
       return TokError(".previous without corresponding .section");
-  getStreamer().SwitchSection(PreviousSection);
+  getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
   return false;
 }
 
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 4c45e08..3134fc3 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -76,6 +76,7 @@ public:
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal");
     addDirectiveHandler<
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSubsection>(".subsection");
   }
 
   // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
@@ -147,9 +148,11 @@ public:
   bool ParseDirectiveVersion(StringRef, SMLoc);
   bool ParseDirectiveWeakref(StringRef, SMLoc);
   bool ParseDirectiveSymbolAttribute(StringRef, SMLoc);
+  bool ParseDirectiveSubsection(StringRef, SMLoc);
 
 private:
   bool ParseSectionName(StringRef &SectionName);
+  bool ParseSectionArguments(bool IsPush);
 };
 
 }
@@ -191,12 +194,15 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
 
 bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
                                       unsigned Flags, SectionKind Kind) {
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in section switching directive");
-  Lex();
+  const MCExpr *Subsection = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getParser().parseExpression(Subsection))
+      return true;
+  }
 
   getStreamer().SwitchSection(getContext().getELFSection(
-                                Section, Type, Flags, Kind));
+                                Section, Type, Flags, Kind),
+                              Subsection);
 
   return false;
 }
@@ -316,7 +322,7 @@ static int parseSectionFlags(StringRef flagsStr) {
 bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
   getStreamer().PushSection();
 
-  if (ParseDirectiveSection(s, loc)) {
+  if (ParseSectionArguments(/*IsPush=*/true)) {
     getStreamer().PopSection();
     return true;
   }
@@ -332,6 +338,10 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 
 // FIXME: This is a work in progress.
 bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  return ParseSectionArguments(/*IsPush=*/false);
+}
+
+bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
   StringRef SectionName;
 
   if (ParseSectionName(SectionName))
@@ -341,6 +351,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   int64_t Size = 0;
   StringRef GroupName;
   unsigned Flags = 0;
+  const MCExpr *Subsection = 0;
 
   // Set the defaults first.
   if (SectionName == ".fini" || SectionName == ".init" ||
@@ -352,6 +363,14 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   if (getLexer().is(AsmToken::Comma)) {
     Lex();
 
+    if (IsPush && getLexer().isNot(AsmToken::String)) {
+      if (getParser().parseExpression(Subsection))
+        return true;
+      if (getLexer().isNot(AsmToken::Comma))
+        goto EndStmt;
+      Lex();
+    }
+   
     if (getLexer().isNot(AsmToken::String))
       return TokError("expected string in directive");
 
@@ -408,6 +427,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
     }
   }
 
+EndStmt:
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
@@ -444,15 +464,16 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   SectionKind Kind = computeSectionKind(Flags);
   getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
                                                          Flags, Kind, Size,
-                                                         GroupName));
+                                                         GroupName),
+                              Subsection);
   return false;
 }
 
 bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
-  const MCSection *PreviousSection = getStreamer().getPreviousSection();
-  if (PreviousSection == NULL)
+  MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection.first == NULL)
       return TokError(".previous without corresponding .section");
-  getStreamer().SwitchSection(PreviousSection);
+  getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
 
   return false;
 }
@@ -613,6 +634,20 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
   return false;
 }
 
+bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) {
+  const MCExpr *Subsection = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getParser().parseExpression(Subsection))
+     return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  getStreamer().SubSection(Subsection);
+  return false;
+}
+
 namespace llvm {
 
 MCAsmParserExtension *createELFAsmParser() {
diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp
index 0e04c55..8ae724f 100644
--- a/contrib/llvm/lib/MC/MCPureStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp
@@ -12,9 +12,8 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
-// FIXME: Remove this.
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -113,25 +112,22 @@ void MCPureStreamer::InitSections() {
 }
 
 void MCPureStreamer::InitToTextSection() {
-  // FIMXE: To what!?
-  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
-                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                    0, SectionKind::getText()));
+  SwitchSection(getContext().getObjectFileInfo()->getTextSection());
 }
 
 void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
 
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
 
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
   if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
-    new MCDataFragment(getCurrentSectionData());
+    insert(new MCDataFragment());
 
   // FIXME: This is wasteful, we don't necessarily need to create a data
   // fragment. Instead, we should mark the symbol as pointing into the data
@@ -166,8 +162,7 @@ void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
   // MCObjectStreamer.
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      getCurrentSectionData());
+  insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -180,8 +175,8 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
   // MCObjectStreamer.
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
-                                           getCurrentSectionData());
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit);
+  insert(F);
   F->setEmitNops(true);
 
   // Update the maximum alignment on the current section if necessary.
@@ -191,13 +186,13 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
 
 bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
                                        unsigned char Value) {
-  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+  insert(new MCOrgFragment(*Offset, Value));
   return false;
 }
 
 void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
-  MCRelaxableFragment *IF =
-    new MCRelaxableFragment(Inst, getCurrentSectionData());
+  MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+  insert(IF);
 
   // Add the fixups and data.
   //
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
index aac9377..6cedf06 100644
--- a/contrib/llvm/lib/MC/MCSectionCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -29,7 +29,8 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
 }
 
 void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                         raw_ostream &OS) const {
+                                         raw_ostream &OS,
+                                         const MCExpr *Subsection) const {
 
   // standard sections don't require the '.section'
   if (ShouldOmitSectionDirective(SectionName, MAI)) {
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index 0775cfa..bf1a984 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
@@ -32,10 +33,14 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
 }
 
 void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                        raw_ostream &OS) const {
+                                        raw_ostream &OS,
+                                        const MCExpr *Subsection) const {
 
   if (ShouldOmitSectionDirective(SectionName, MAI)) {
-    OS << '\t' << getSectionName() << '\n';
+    OS << '\t' << getSectionName();
+    if (Subsection)
+      OS << '\t' << *Subsection;
+    OS << '\n';
     return;
   }
 
@@ -129,6 +134,9 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
   if (Flags & ELF::SHF_GROUP)
     OS << "," << Group->getName() << ",comdat";
   OS << '\n';
+
+  if (Subsection)
+    OS << "\t.subsection\t" << *Subsection << '\n';
 }
 
 bool MCSectionELF::UseCodeAlign() const {
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
index fc32315..8704513 100644
--- a/contrib/llvm/lib/MC/MCSectionMachO.cpp
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -91,7 +91,8 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
 }
 
 void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                          raw_ostream &OS) const {
+                                          raw_ostream &OS,
+                                          const MCExpr *Subsection) const {
   OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
 
   // Get the section type and attributes.
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index d02e553..8f1895e 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -24,8 +24,7 @@ using namespace llvm;
 MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
     : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
       CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
-  const MCSection *section = 0;
-  SectionStack.push_back(std::make_pair(section, section));
+  SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
 }
 
 MCStreamer::~MCStreamer() {
@@ -36,13 +35,13 @@ MCStreamer::~MCStreamer() {
 void MCStreamer::reset() {
   for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
     delete W64UnwindInfos[i];
+  W64UnwindInfos.clear();
   EmitEHFrame = true;
   EmitDebugFrame = false;
   CurrentW64UnwindInfo = 0;
   LastSymbol = 0;
-  const MCSection *section = 0;
   SectionStack.clear();
-  SectionStack.push_back(std::make_pair(section, section));
+  SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
 }
 
 const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
@@ -188,15 +187,15 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
 
 void MCStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
-  Symbol->setSection(*getCurrentSection());
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection().first);
   LastSymbol = Symbol;
 }
 
 void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
-  Symbol->setSection(*getCurrentSection());
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection().first);
   LastSymbol = Symbol;
 }
 
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 6dffed7..518b59e 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -147,8 +147,7 @@ public:
   object_t *createCOFFEntity(StringRef Name, list_t &List);
 
   void DefineSection(MCSectionData const &SectionData);
-  void DefineSymbol(MCSymbol const &Symbol,
-                    MCSymbolData const &SymbolData,
+  void DefineSymbol(MCSymbolData const &SymbolData,
                     MCAssembler &Assembler);
 
   void MakeSymbolReal(COFFSymbol &S, size_t Index);
@@ -410,25 +409,23 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
 
 /// This function takes a section data object from the assembler
 /// and creates the associated COFF symbol staging object.
-void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
-                                       MCSymbolData const &SymbolData,
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
                                        MCAssembler &Assembler) {
+  MCSymbol const &Symbol = SymbolData.getSymbol();
   COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
-
-  coff_symbol->Data.Type         = (SymbolData.getFlags() & 0x0000FFFF) >>  0;
-  coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+  SymbolMap[&Symbol] = coff_symbol;
 
   if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
     coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
 
     if (Symbol.isVariable()) {
-      coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+      const MCSymbolRefExpr *SymRef =
+        dyn_cast<MCSymbolRefExpr>(Symbol.getVariableValue());
 
-      // FIXME: This assert message isn't very good.
-      assert(Symbol.getVariableValue()->getKind() == MCExpr::SymbolRef &&
-              "Value must be a SymbolRef!");
+      if (!SymRef)
+        report_fatal_error("Weak externals may only alias symbols");
 
-      coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol);
+      coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol());
     } else {
       std::string WeakName = std::string(".weak.")
                            +  Symbol.getName().str()
@@ -448,23 +445,29 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
     coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
     coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
       COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
-  }
 
-  // If no storage class was specified in the streamer, define it here.
-  if (coff_symbol->Data.StorageClass == 0) {
-    bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+    coff_symbol->MCData = &SymbolData;
+  } else {
+    const MCSymbolData &ResSymData =
+      Assembler.getSymbolData(Symbol.AliasedSymbol());
 
-    coff_symbol->Data.StorageClass =
-      external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
-  }
+    coff_symbol->Data.Type         = (ResSymData.getFlags() & 0x0000FFFF) >>  0;
+    coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16;
 
-  if (SymbolData.Fragment != NULL)
-    coff_symbol->Section =
-      SectionMap[&SymbolData.Fragment->getParent()->getSection()];
+    // If no storage class was specified in the streamer, define it here.
+    if (coff_symbol->Data.StorageClass == 0) {
+      bool external = ResSymData.isExternal() || (ResSymData.Fragment == NULL);
 
-  // Bind internal COFF symbol to MC symbol.
-  coff_symbol->MCData = &SymbolData;
-  SymbolMap[&Symbol] = coff_symbol;
+      coff_symbol->Data.StorageClass =
+       external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+    }
+
+    if (ResSymData.Fragment != NULL)
+      coff_symbol->Section =
+        SectionMap[&ResSymData.Fragment->getParent()->getSection()];
+
+    coff_symbol->MCData = &ResSymData;
+  }
 }
 
 /// making a section real involves assigned it a number and putting
@@ -620,9 +623,7 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
   for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
                                           e = Asm.symbol_end(); i != e; i++) {
     if (ExportSymbol(*i, Asm)) {
-      const MCSymbol &Alias = i->getSymbol();
-      const MCSymbol &Symbol = Alias.AliasedSymbol();
-      DefineSymbol(Alias, Asm.getSymbolData(Symbol), Asm);
+      DefineSymbol(*i, Asm);
     }
   }
 }
@@ -689,13 +690,8 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   ++Reloc.Symb->Relocations;
 
   Reloc.Data.VirtualAddress += Fixup.getOffset();
-
-  unsigned FixupKind = Fixup.getKind();
-
-  if (CrossSection)
-    FixupKind = FK_PCRel_4;
-
-  Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind);
+  Reloc.Data.Type = TargetObjectWriter->getRelocType(Target, Fixup,
+                                                     CrossSection);
 
   // FIXME: Can anyone explain what this does other than adjust for the size
   // of the offset?
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index ca90e0e..70fec32 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -429,7 +429,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
 }
 
 COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
-  : ObjectFile(Binary::ID_COFF, Object, ec)
+  : ObjectFile(Binary::ID_COFF, Object)
   , Header(0)
   , SectionTable(0)
   , SymbolTable(0)
@@ -705,8 +705,7 @@ error_code COFFObjectFile::getRelocationNext(DataRefImpl Rel,
 }
 error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
                                                 uint64_t &Res) const {
-  Res = toRel(Rel)->VirtualAddress;
-  return object_error::success;
+  report_fatal_error("getRelocationAddress not implemented in COFFObjectFile");
 }
 error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
                                                uint64_t &Res) const {
diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp
deleted file mode 100644
index c9c341a..0000000
--- a/contrib/llvm/lib/Object/MachOObject.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Object/MachOObject.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SwapByteOrder.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-using namespace llvm::object;
-
-/* Translation Utilities */
-
-template<typename T>
-static void SwapValue(T &Value) {
-  Value = sys::SwapByteOrder(Value);
-}
-
-template<typename T>
-static void SwapStruct(T &Value);
-
-template<typename T>
-static void ReadInMemoryStruct(const MachOObject &MOO,
-                               StringRef Buffer, uint64_t Base,
-                               InMemoryStruct<T> &Res) {
-  typedef T struct_type;
-  uint64_t Size = sizeof(struct_type);
-
-  // Check that the buffer contains the expected data.
-  if (Base + Size >  Buffer.size()) {
-    Res = 0;
-    return;
-  }
-
-  // Check whether we can return a direct pointer.
-  struct_type *Ptr = reinterpret_cast<struct_type *>(
-                       const_cast<char *>(Buffer.data() + Base));
-  if (!MOO.isSwappedEndian()) {
-    Res = Ptr;
-    return;
-  }
-
-  // Otherwise, copy the struct and translate the values.
-  Res = *Ptr;
-  SwapStruct(*Res);
-}
-
-/* *** */
-
-MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
-                         bool Is64Bit_)
-  : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
-    IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
-    HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
-  // Load the common header.
-  memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
-  if (IsSwappedEndian) {
-    SwapValue(Header.Magic);
-    SwapValue(Header.CPUType);
-    SwapValue(Header.CPUSubtype);
-    SwapValue(Header.FileType);
-    SwapValue(Header.NumLoadCommands);
-    SwapValue(Header.SizeOfLoadCommands);
-    SwapValue(Header.Flags);
-  }
-
-  if (is64Bit()) {
-    memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
-           sizeof(Header64Ext));
-    if (IsSwappedEndian) {
-      SwapValue(Header64Ext.Reserved);
-    }
-  }
-
-  // Create the load command array if sane.
-  if (getHeader().NumLoadCommands < (1 << 20))
-    LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
-}
-
-MachOObject::~MachOObject() {
-  delete [] LoadCommands;
-}
-
-MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
-                                         std::string *ErrorStr) {
-  // First, check the magic value and initialize the basic object info.
-  bool IsLittleEndian = false, Is64Bit = false;
-  StringRef Magic = Buffer->getBuffer().slice(0, 4);
-  if (Magic == "\xFE\xED\xFA\xCE") {
-  }  else if (Magic == "\xCE\xFA\xED\xFE") {
-    IsLittleEndian = true;
-  } else if (Magic == "\xFE\xED\xFA\xCF") {
-    Is64Bit = true;
-  } else if (Magic == "\xCF\xFA\xED\xFE") {
-    IsLittleEndian = true;
-    Is64Bit = true;
-  } else {
-    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
-    return 0;
-  }
-
-  // Ensure that the at least the full header is present.
-  unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
-  if (Buffer->getBufferSize() < HeaderSize) {
-    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
-    return 0;
-  }
-
-  OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
-                                                Is64Bit));
-
-  // Check for bogus number of load commands.
-  if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
-    if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
-    return 0;
-  }
-
-  if (ErrorStr) *ErrorStr = "";
-  return Object.take();
-}
-
-StringRef MachOObject::getData(size_t Offset, size_t Size) const {
-  return Buffer->getBuffer().substr(Offset,Size);
-}
-
-void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
-  HasStringTable = true;
-  StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
-                                           SLC.StringTableSize);
-}
-
-const MachOObject::LoadCommandInfo &
-MachOObject::getLoadCommandInfo(unsigned Index) const {
-  assert(Index < getHeader().NumLoadCommands && "Invalid index!");
-
-  // Load the command, if necessary.
-  if (Index >= NumLoadedCommands) {
-    uint64_t Offset;
-    if (Index == 0) {
-      Offset = getHeaderSize();
-    } else {
-      const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
-      Offset = Prev.Offset + Prev.Command.Size;
-    }
-
-    LoadCommandInfo &Info = LoadCommands[Index];
-    memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
-           sizeof(macho::LoadCommand));
-    if (IsSwappedEndian) {
-      SwapValue(Info.Command.Type);
-      SwapValue(Info.Command.Size);
-    }
-    Info.Offset = Offset;
-    NumLoadedCommands = Index + 1;
-  }
-
-  return LoadCommands[Index];
-}
-
-template<>
-void SwapStruct(macho::SegmentLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.VMAddress);
-  SwapValue(Value.VMSize);
-  SwapValue(Value.FileOffset);
-  SwapValue(Value.FileSize);
-  SwapValue(Value.MaxVMProtection);
-  SwapValue(Value.InitialVMProtection);
-  SwapValue(Value.NumSections);
-  SwapValue(Value.Flags);
-}
-void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
-                         InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Segment64LoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.VMAddress);
-  SwapValue(Value.VMSize);
-  SwapValue(Value.FileOffset);
-  SwapValue(Value.FileSize);
-  SwapValue(Value.MaxVMProtection);
-  SwapValue(Value.InitialVMProtection);
-  SwapValue(Value.NumSections);
-  SwapValue(Value.Flags);
-}
-void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
-                       InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::SymtabLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.SymbolTableOffset);
-  SwapValue(Value.NumSymbolTableEntries);
-  SwapValue(Value.StringTableOffset);
-  SwapValue(Value.StringTableSize);
-}
-void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
-                          InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::DysymtabLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.LocalSymbolsIndex);
-  SwapValue(Value.NumLocalSymbols);
-  SwapValue(Value.ExternalSymbolsIndex);
-  SwapValue(Value.NumExternalSymbols);
-  SwapValue(Value.UndefinedSymbolsIndex);
-  SwapValue(Value.NumUndefinedSymbols);
-  SwapValue(Value.TOCOffset);
-  SwapValue(Value.NumTOCEntries);
-  SwapValue(Value.ModuleTableOffset);
-  SwapValue(Value.NumModuleTableEntries);
-  SwapValue(Value.ReferenceSymbolTableOffset);
-  SwapValue(Value.NumReferencedSymbolTableEntries);
-  SwapValue(Value.IndirectSymbolTableOffset);
-  SwapValue(Value.NumIndirectSymbolTableEntries);
-  SwapValue(Value.ExternalRelocationTableOffset);
-  SwapValue(Value.NumExternalRelocationTableEntries);
-  SwapValue(Value.LocalRelocationTableOffset);
-  SwapValue(Value.NumLocalRelocationTableEntries);
-}
-void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
-                        InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::LinkeditDataLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.DataOffset);
-  SwapValue(Value.DataSize);
-}
-void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI,
-                    InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::LinkerOptionsLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.Count);
-}
-void MachOObject::ReadLinkerOptionsLoadCommand(const LoadCommandInfo &LCI,
-                   InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
-  SwapValue(Value.Index);
-}
-void
-MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
-                                          unsigned Index,
-                   InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
-  uint64_t Offset = (DLC.IndirectSymbolTableOffset +
-                     Index * sizeof(macho::IndirectSymbolTableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-
-template<>
-void SwapStruct(macho::Section &Value) {
-  SwapValue(Value.Address);
-  SwapValue(Value.Size);
-  SwapValue(Value.Offset);
-  SwapValue(Value.Align);
-  SwapValue(Value.RelocationTableOffset);
-  SwapValue(Value.NumRelocationTableEntries);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Reserved1);
-  SwapValue(Value.Reserved2);
-}
-void MachOObject::ReadSection(const LoadCommandInfo &LCI,
-                              unsigned Index,
-                              InMemoryStruct<macho::Section> &Res) const {
-  assert(LCI.Command.Type == macho::LCT_Segment &&
-         "Unexpected load command info!");
-  uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-                     Index * sizeof(macho::Section));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Section64 &Value) {
-  SwapValue(Value.Address);
-  SwapValue(Value.Size);
-  SwapValue(Value.Offset);
-  SwapValue(Value.Align);
-  SwapValue(Value.RelocationTableOffset);
-  SwapValue(Value.NumRelocationTableEntries);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Reserved1);
-  SwapValue(Value.Reserved2);
-  SwapValue(Value.Reserved3);
-}
-void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
-                                unsigned Index,
-                                InMemoryStruct<macho::Section64> &Res) const {
-  assert(LCI.Command.Type == macho::LCT_Segment64 &&
-         "Unexpected load command info!");
-  uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-                     Index * sizeof(macho::Section64));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::RelocationEntry &Value) {
-  SwapValue(Value.Word0);
-  SwapValue(Value.Word1);
-}
-void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
-                                      unsigned Index,
-                            InMemoryStruct<macho::RelocationEntry> &Res) const {
-  uint64_t Offset = (RelocationTableOffset +
-                     Index * sizeof(macho::RelocationEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::SymbolTableEntry &Value) {
-  SwapValue(Value.StringIndex);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Value);
-}
-void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
-                                       unsigned Index,
-                           InMemoryStruct<macho::SymbolTableEntry> &Res) const {
-  uint64_t Offset = (SymbolTableOffset +
-                     Index * sizeof(macho::SymbolTableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Symbol64TableEntry &Value) {
-  SwapValue(Value.StringIndex);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Value);
-}
-void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
-                                       unsigned Index,
-                         InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
-  uint64_t Offset = (SymbolTableOffset +
-                     Index * sizeof(macho::Symbol64TableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::DataInCodeTableEntry &Value) {
-  SwapValue(Value.Offset);
-  SwapValue(Value.Length);
-  SwapValue(Value.Kind);
-}
-void MachOObject::ReadDataInCodeTableEntry(uint64_t TableOffset,
-                                           unsigned Index,
-                       InMemoryStruct<macho::DataInCodeTableEntry> &Res) const {
-  uint64_t Offset = (TableOffset +
-                     Index * sizeof(macho::DataInCodeTableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-void MachOObject::ReadULEB128s(uint64_t Index,
-                               SmallVectorImpl<uint64_t> &Out) const {
-  DataExtractor extractor(Buffer->getBuffer(), true, 0);
-
-  uint32_t offset = Index;
-  uint64_t data = 0;
-  while (uint64_t delta = extractor.getULEB128(&offset)) {
-    data += delta;
-    Out.push_back(data);
-  }
-}
-
-/* ** */
-// Object Dumping Facilities
-void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
-void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
-
-void MachOObject::printHeader(raw_ostream &O) const {
-  O << "('cputype', " << Header.CPUType << ")\n";
-  O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
-  O << "('filetype', " << Header.FileType << ")\n";
-  O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
-  O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
-  O << "('flag', " << Header.Flags << ")\n";
-
-  // Print extended header if 64-bit.
-  if (is64Bit())
-    O << "('reserved', " << Header64Ext.Reserved << ")\n";
-}
-
-void MachOObject::print(raw_ostream &O) const {
-  O << "Header:\n";
-  printHeader(O);
-  O << "Load Commands:\n";
-
-  O << "Buffer:\n";
-}
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index 6501df9..dfd8d3d 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -15,7 +15,9 @@
 #include "llvm/Object/MachO.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cctype>
 #include <cstring>
@@ -27,236 +29,560 @@ using namespace object;
 namespace llvm {
 namespace object {
 
-MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
-                                 error_code &ec)
-    : ObjectFile(Binary::ID_MachO, Object, ec),
-      MachOObj(MOO),
-      RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
-  DataRefImpl DRI;
-  moveToNextSection(DRI);
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    Sections.push_back(DRI);
-    DRI.d.b++;
-    moveToNextSection(DRI);
+struct SymbolTableEntryBase {
+  uint32_t StringIndex;
+  uint8_t Type;
+  uint8_t SectionIndex;
+  uint16_t Flags;
+};
+
+struct SectionBase {
+  char Name[16];
+  char SegmentName[16];
+};
+
+template<typename T>
+static void SwapValue(T &Value) {
+  Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<>
+void SwapStruct(macho::RelocationEntry &H) {
+  SwapValue(H.Word0);
+  SwapValue(H.Word1);
+}
+
+template<>
+void SwapStruct(macho::LoadCommand &L) {
+  SwapValue(L.Type);
+  SwapValue(L.Size);
+}
+
+template<>
+void SwapStruct(SymbolTableEntryBase &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+}
+
+template<>
+void SwapStruct(macho::Section &S) {
+  SwapValue(S.Address);
+  SwapValue(S.Size);
+  SwapValue(S.Offset);
+  SwapValue(S.Align);
+  SwapValue(S.RelocationTableOffset);
+  SwapValue(S.NumRelocationTableEntries);
+  SwapValue(S.Flags);
+  SwapValue(S.Reserved1);
+  SwapValue(S.Reserved2);
+}
+
+template<>
+void SwapStruct(macho::Section64 &S) {
+  SwapValue(S.Address);
+  SwapValue(S.Size);
+  SwapValue(S.Offset);
+  SwapValue(S.Align);
+  SwapValue(S.RelocationTableOffset);
+  SwapValue(S.NumRelocationTableEntries);
+  SwapValue(S.Flags);
+  SwapValue(S.Reserved1);
+  SwapValue(S.Reserved2);
+  SwapValue(S.Reserved3);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+  SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+  SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Header &H) {
+  SwapValue(H.Magic);
+  SwapValue(H.CPUType);
+  SwapValue(H.CPUSubtype);
+  SwapValue(H.FileType);
+  SwapValue(H.NumLoadCommands);
+  SwapValue(H.SizeOfLoadCommands);
+  SwapValue(H.Flags);
+}
+
+template<>
+void SwapStruct(macho::Header64Ext &E) {
+  SwapValue(E.Reserved);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.SymbolTableOffset);
+  SwapValue(C.NumSymbolTableEntries);
+  SwapValue(C.StringTableOffset);
+  SwapValue(C.StringTableSize);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.LocalSymbolsIndex);
+  SwapValue(C.NumLocalSymbols);
+  SwapValue(C.ExternalSymbolsIndex);
+  SwapValue(C.NumExternalSymbols);
+  SwapValue(C.UndefinedSymbolsIndex);
+  SwapValue(C.NumUndefinedSymbols);
+  SwapValue(C.TOCOffset);
+  SwapValue(C.NumTOCEntries);
+  SwapValue(C.ModuleTableOffset);
+  SwapValue(C.NumModuleTableEntries);
+  SwapValue(C.ReferenceSymbolTableOffset);
+  SwapValue(C.NumReferencedSymbolTableEntries);
+  SwapValue(C.IndirectSymbolTableOffset);
+  SwapValue(C.NumIndirectSymbolTableEntries);
+  SwapValue(C.ExternalRelocationTableOffset);
+  SwapValue(C.NumExternalRelocationTableEntries);
+  SwapValue(C.LocalRelocationTableOffset);
+  SwapValue(C.NumLocalRelocationTableEntries);
+}
+
+template<>
+void SwapStruct(macho::LinkeditDataLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.DataOffset);
+  SwapValue(C.DataSize);
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.VMAddress);
+  SwapValue(C.VMSize);
+  SwapValue(C.FileOffset);
+  SwapValue(C.FileSize);
+  SwapValue(C.MaxVMProtection);
+  SwapValue(C.InitialVMProtection);
+  SwapValue(C.NumSections);
+  SwapValue(C.Flags);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.VMAddress);
+  SwapValue(C.VMSize);
+  SwapValue(C.FileOffset);
+  SwapValue(C.FileSize);
+  SwapValue(C.MaxVMProtection);
+  SwapValue(C.InitialVMProtection);
+  SwapValue(C.NumSections);
+  SwapValue(C.Flags);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &C) {
+  SwapValue(C.Index);
+}
+
+template<>
+void SwapStruct(macho::LinkerOptionsLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.Count);
+}
+
+template<>
+void SwapStruct(macho::DataInCodeTableEntry &C) {
+  SwapValue(C.Offset);
+  SwapValue(C.Length);
+  SwapValue(C.Kind);
+}
+
+template<typename T>
+T getStruct(const MachOObjectFile *O, const char *P) {
+  T Cmd;
+  memcpy(&Cmd, P, sizeof(T));
+  if (O->isLittleEndian() != sys::IsLittleEndianHost)
+    SwapStruct(Cmd);
+  return Cmd;
+}
+
+static uint32_t
+getSegmentLoadCommandNumSections(const MachOObjectFile *O,
+                                 const MachOObjectFile::LoadCommandInfo &L) {
+  if (O->is64Bit()) {
+    macho::Segment64LoadCommand S = O->getSegment64LoadCommand(L);
+    return S.NumSections;
   }
+  macho::SegmentLoadCommand S = O->getSegmentLoadCommand(L);
+  return S.NumSections;
 }
 
+static const char *
+getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
+              unsigned Sec) {
+  uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr);
 
-ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  bool Is64 = O->is64Bit();
+  unsigned SegmentLoadSize = Is64 ? sizeof(macho::Segment64LoadCommand) :
+                                    sizeof(macho::SegmentLoadCommand);
+  unsigned SectionSize = Is64 ? sizeof(macho::Section64) :
+                                sizeof(macho::Section);
+
+  uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize;
+  return reinterpret_cast<const char*>(SectionAddr);
+}
+
+static const char *getPtr(const MachOObjectFile *O, size_t Offset) {
+  return O->getData().substr(Offset, 1).data();
+}
+
+static SymbolTableEntryBase
+getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) {
+  const char *P = reinterpret_cast<const char *>(DRI.p);
+  return getStruct<SymbolTableEntryBase>(O, P);
+}
+
+static StringRef parseSegmentOrSectionName(const char *P) {
+  if (P[15] == 0)
+    // Null terminated.
+    return P;
+  // Not null terminated, so this is a 16 char string.
+  return StringRef(P, 16);
+}
+
+// Helper to advance a section or symbol iterator multiple increments at a time.
+template<class T>
+static error_code advance(T &it, size_t Val) {
   error_code ec;
-  std::string Err;
-  MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
-  if (!MachOObj)
-    return NULL;
-  // MachOObject takes ownership of the Buffer we passed to it, and
-  // MachOObjectFile does, too, so we need to make sure they don't get the
-  // same object. A MemoryBuffer is cheap (it's just a reference to memory,
-  // not a copy of the memory itself), so just make a new copy here for
-  // the MachOObjectFile.
-  MemoryBuffer *NewBuffer =
-    MemoryBuffer::getMemBuffer(Buffer->getBuffer(),
-                               Buffer->getBufferIdentifier(), false);
-  return new MachOObjectFile(NewBuffer, MachOObj, ec);
-}
-
-/*===-- Symbols -----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    if (LCI.Command.Type == macho::LCT_Symtab) {
-      InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-      MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
-      if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
-        return;
+  while (Val--) {
+    it.increment(ec);
+  }
+  return ec;
+}
+
+template<class T>
+static void advanceTo(T &it, size_t Val) {
+  if (error_code ec = advance(it, Val))
+    report_fatal_error(ec.message());
+}
+
+static unsigned getCPUType(const MachOObjectFile *O) {
+  return O->getHeader().CPUType;
+}
+
+static void printRelocationTargetName(const MachOObjectFile *O,
+                                      const macho::RelocationEntry &RE,
+                                      raw_string_ostream &fmt) {
+  bool IsScattered = O->isRelocationScattered(RE);
+
+  // Target of a scattered relocation is an address.  In the interest of
+  // generating pretty output, scan through the symbol table looking for a
+  // symbol that aligns with that address.  If we find one, print it.
+  // Otherwise, we just print the hex address of the target.
+  if (IsScattered) {
+    uint32_t Val = O->getPlainRelocationSymbolNum(RE);
+
+    error_code ec;
+    for (symbol_iterator SI = O->begin_symbols(), SE = O->end_symbols();
+         SI != SE; SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
+    }
+
+    // If we couldn't find a symbol that this relocation refers to, try
+    // to find a section beginning instead.
+    for (section_iterator SI = O->begin_sections(), SE = O->end_sections();
+         SI != SE; SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
     }
 
-    DRI.d.a++;
-    DRI.d.b = 0;
+    fmt << format("0x%x", Val);
+    return;
   }
-}
 
-void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
-    InMemoryStruct<macho::SymbolTableEntry> &Res) const {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+  StringRef S;
+  bool isExtern = O->getPlainRelocationExternal(RE);
+  uint64_t Val = O->getAnyRelocationAddress(RE);
 
-  if (RegisteredStringTable != DRI.d.a) {
-    MachOObj->RegisterStringTable(*SymtabLoadCmd);
-    RegisteredStringTable = DRI.d.a;
+  if (isExtern) {
+    symbol_iterator SI = O->begin_symbols();
+    advanceTo(SI, Val);
+    SI->getName(S);
+  } else {
+    section_iterator SI = O->begin_sections();
+    advanceTo(SI, Val);
+    SI->getName(S);
   }
 
-  MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
-                                 Res);
+  fmt << S;
+}
+
+static uint32_t getPlainRelocationAddress(const macho::RelocationEntry &RE) {
+  return RE.Word0;
+}
+
+static unsigned
+getScatteredRelocationAddress(const macho::RelocationEntry &RE) {
+  return RE.Word0 & 0xffffff;
+}
+
+static bool getPlainRelocationPCRel(const MachOObjectFile *O,
+                                    const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return (RE.Word1 >> 24) & 1;
+  return (RE.Word1 >> 7) & 1;
+}
+
+static bool
+getScatteredRelocationPCRel(const MachOObjectFile *O,
+                            const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 30) & 1;
+}
+
+static unsigned getPlainRelocationLength(const MachOObjectFile *O,
+                                         const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return (RE.Word1 >> 25) & 3;
+  return (RE.Word1 >> 5) & 3;
+}
+
+static unsigned
+getScatteredRelocationLength(const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 28) & 3;
 }
 
-void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
-    InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+static unsigned getPlainRelocationType(const MachOObjectFile *O,
+                                       const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return RE.Word1 >> 28;
+  return RE.Word1 & 0xf;
+}
 
-  if (RegisteredStringTable != DRI.d.a) {
-    MachOObj->RegisterStringTable(*SymtabLoadCmd);
-    RegisteredStringTable = DRI.d.a;
+static unsigned getScatteredRelocationType(const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 24) & 0xf;
+}
+
+static uint32_t getSectionFlags(const MachOObjectFile *O,
+                                DataRefImpl Sec) {
+  if (O->is64Bit()) {
+    macho::Section64 Sect = O->getSection64(Sec);
+    return Sect.Flags;
   }
+  macho::Section Sect = O->getSection(Sec);
+  return Sect.Flags;
+}
+
+MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
+                                 bool IsLittleEndian, bool Is64bits,
+                                 error_code &ec)
+    : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
+      SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
+  uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
+  macho::LoadCommandType SegmentLoadType = is64Bit() ?
+    macho::LCT_Segment64 : macho::LCT_Segment;
+
+  MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo();
+  for (unsigned I = 0; ; ++I) {
+    if (Load.C.Type == macho::LCT_Symtab) {
+      assert(!SymtabLoadCmd && "Multiple symbol tables");
+      SymtabLoadCmd = Load.Ptr;
+    } else if (Load.C.Type == macho::LCT_Dysymtab) {
+      assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
+      DysymtabLoadCmd = Load.Ptr;
+    } else if (Load.C.Type == SegmentLoadType) {
+      uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
+      for (unsigned J = 0; J < NumSections; ++J) {
+        const char *Sec = getSectionPtr(this, Load, J);
+        Sections.push_back(Sec);
+      }
+    }
 
-  MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
-                                   Res);
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = getNextLoadCommandInfo(Load);
+  }
 }
 
+error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb,
+                                          SymbolRef &Res) const {
+  unsigned SymbolTableEntrySize = is64Bit() ?
+    sizeof(macho::Symbol64TableEntry) :
+    sizeof(macho::SymbolTableEntry);
+  Symb.p += SymbolTableEntrySize;
+  Res = SymbolRef(Symb, this);
+  return object_error::success;
+}
 
-error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
-                                          SymbolRef &Result) const {
-  DRI.d.b++;
-  moveToNextSymbol(DRI);
-  Result = SymbolRef(DRI, this);
+error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
+                                          StringRef &Res) const {
+  StringRef StringTable = getStringTableData();
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  const char *Start = &StringTable.data()[Entry.StringIndex];
+  Res = StringRef(Start);
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
-                                          StringRef &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
+                                             uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Symbol64TableEntry Entry = getSymbol64TableEntry(Symb);
+    Res = Entry.Value;
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+    macho::SymbolTableEntry Entry = getSymbolTableEntry(Symb);
+    Res = Entry.Value;
   }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
-                                                uint64_t &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = Entry->Value;
-    if (Entry->SectionIndex) {
-      InMemoryStruct<macho::Section64> Section;
-      getSection64(Sections[Entry->SectionIndex-1], Section);
-      Result += Section->Offset - Section->Address;
-    }
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = Entry->Value;
-    if (Entry->SectionIndex) {
-      InMemoryStruct<macho::Section> Section;
-      getSection(Sections[Entry->SectionIndex-1], Section);
-      Result += Section->Offset - Section->Address;
+error_code
+MachOObjectFile::getSymbolFileOffset(DataRefImpl Symb,
+                                     uint64_t &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  getSymbolAddress(Symb, Res);
+  if (Entry.SectionIndex) {
+    uint64_t Delta;
+    DataRefImpl SecRel;
+    SecRel.d.a = Entry.SectionIndex-1;
+    if (is64Bit()) {
+      macho::Section64 Sec = getSection64(SecRel);
+      Delta = Sec.Offset - Sec.Address;
+    } else {
+      macho::Section Sec = getSection(SecRel);
+      Delta = Sec.Offset - Sec.Address;
     }
+
+    Res += Delta;
   }
 
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
-                                             uint64_t &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = Entry->Value;
+error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
+                                               uint32_t &Result) const {
+  uint32_t flags;
+  this->getSymbolFlags(DRI, flags);
+  if (flags & SymbolRef::SF_Common) {
+    SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+    Result = 1 << MachO::GET_COMM_ALIGN(Entry.Flags);
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = Entry->Value;
+    Result = 0;
   }
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
                                           uint64_t &Result) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
   uint64_t BeginOffset;
   uint64_t EndOffset = 0;
   uint8_t SectionIndex;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    BeginOffset = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
-    if (!SectionIndex) {
-      uint32_t flags = SymbolRef::SF_None;
-      getSymbolFlags(DRI, flags);
-      if (flags & SymbolRef::SF_Common)
-        Result = Entry->Value;
-      else
-        Result = UnknownAddressOrSize;
-      return object_error::success;
-    }
-    // Unfortunately symbols are unsorted so we need to touch all
-    // symbols from load command
-    DRI.d.b = 0;
-    uint32_t Command = DRI.d.a;
-    while (Command == DRI.d.a) {
-      moveToNextSymbol(DRI);
-      if (DRI.d.a < LoadCommandCount) {
-        getSymbol64TableEntry(DRI, Entry);
-        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
-          if (!EndOffset || Entry->Value < EndOffset)
-            EndOffset = Entry->Value;
-      }
-      DRI.d.b++;
-    }
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    BeginOffset = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
-    if (!SectionIndex) {
-      uint32_t flags = SymbolRef::SF_None;
-      getSymbolFlags(DRI, flags);
-      if (flags & SymbolRef::SF_Common)
-        Result = Entry->Value;
-      else
-        Result = UnknownAddressOrSize;
-      return object_error::success;
-    }
-    // Unfortunately symbols are unsorted so we need to touch all
-    // symbols from load command
-    DRI.d.b = 0;
-    uint32_t Command = DRI.d.a;
-    while (Command == DRI.d.a) {
-      moveToNextSymbol(DRI);
-      if (DRI.d.a < LoadCommandCount) {
-        getSymbolTableEntry(DRI, Entry);
-        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
-          if (!EndOffset || Entry->Value < EndOffset)
-            EndOffset = Entry->Value;
-      }
-      DRI.d.b++;
-    }
+
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+  uint64_t Value;
+  getSymbolAddress(DRI, Value);
+
+  BeginOffset = Value;
+
+  SectionIndex = Entry.SectionIndex;
+  if (!SectionIndex) {
+    uint32_t flags = SymbolRef::SF_None;
+    this->getSymbolFlags(DRI, flags);
+    if (flags & SymbolRef::SF_Common)
+      Result = Value;
+    else
+      Result = UnknownAddressOrSize;
+    return object_error::success;
+  }
+  // Unfortunately symbols are unsorted so we need to touch all
+  // symbols from load command
+  error_code ec;
+  for (symbol_iterator I = begin_symbols(), E = end_symbols(); I != E;
+       I.increment(ec)) {
+    DataRefImpl DRI = I->getRawDataRefImpl();
+    Entry = getSymbolTableEntryBase(this, DRI);
+    getSymbolAddress(DRI, Value);
+    if (Entry.SectionIndex == SectionIndex && Value > BeginOffset)
+      if (!EndOffset || Value < EndOffset)
+        EndOffset = Value;
   }
   if (!EndOffset) {
     uint64_t Size;
-    getSectionSize(Sections[SectionIndex-1], Size);
-    getSectionAddress(Sections[SectionIndex-1], EndOffset);
+    DataRefImpl Sec;
+    Sec.d.a = SectionIndex-1;
+    getSectionSize(Sec, Size);
+    getSectionAddress(Sec, EndOffset);
     EndOffset += Size;
   }
   Result = EndOffset - BeginOffset;
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
-                                                char &Result) const {
-  uint8_t Type, Flags;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Type = Entry->Type;
-    Flags = Entry->Flags;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Type = Entry->Type;
-    Flags = Entry->Flags;
+error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
+                                          SymbolRef::Type &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t n_type = Entry.Type;
+
+  Res = SymbolRef::ST_Other;
+
+  // If this is a STAB debugging symbol, we can do nothing more.
+  if (n_type & MachO::NlistMaskStab) {
+    Res = SymbolRef::ST_Debug;
+    return object_error::success;
+  }
+
+  switch (n_type & MachO::NlistMaskType) {
+    case MachO::NListTypeUndefined :
+      Res = SymbolRef::ST_Unknown;
+      break;
+    case MachO::NListTypeSection :
+      Res = SymbolRef::ST_Function;
+      break;
   }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+                                                char &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t Type = Entry.Type;
+  uint16_t Flags = Entry.Flags;
 
   char Char;
   switch (Type & macho::STF_TypeMask) {
@@ -274,25 +600,16 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
 
   if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
     Char = toupper(static_cast<unsigned char>(Char));
-  Result = Char;
+  Res = Char;
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
                                            uint32_t &Result) const {
-  uint16_t MachOFlags;
-  uint8_t MachOType;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    MachOFlags = Entry->Flags;
-    MachOType = Entry->Type;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    MachOFlags = Entry->Flags;
-    MachOType = Entry->Type;
-  }
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+
+  uint8_t MachOType = Entry.Type;
+  uint16_t MachOFlags = Entry.Flags;
 
   // TODO: Correctly set SF_ThreadLocal
   Result = SymbolRef::SF_None;
@@ -305,8 +622,12 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
 
   if (MachOType & MachO::NlistMaskExternal) {
     Result |= SymbolRef::SF_Global;
-    if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
-      Result |= SymbolRef::SF_Common;
+    if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) {
+      uint64_t Value;
+      getSymbolAddress(DRI, Value);
+      if (Value)
+        Result |= SymbolRef::SF_Common;
+    }
   }
 
   if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
@@ -318,55 +639,20 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
-                                             section_iterator &Res) const {
-  uint8_t index;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    index = Entry->SectionIndex;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    index = Entry->SectionIndex;
-  }
+error_code
+MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+                                  section_iterator &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t index = Entry.SectionIndex;
 
-  if (index == 0)
+  if (index == 0) {
     Res = end_sections();
-  else
-    Res = section_iterator(SectionRef(Sections[index-1], this));
-
-  return object_error::success;
-}
-
-error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
-                                          SymbolRef::Type &Res) const {
-  uint8_t n_type;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    n_type = Entry->Type;
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    n_type = Entry->Type;
-  }
-  Res = SymbolRef::ST_Other;
-
-  // If this is a STAB debugging symbol, we can do nothing more.
-  if (n_type & MachO::NlistMaskStab) {
-    Res = SymbolRef::ST_Debug;
-    return object_error::success;
+    DataRefImpl DRI;
+    DRI.d.a = index - 1;
+    Res = section_iterator(SectionRef(DRI, this));
   }
 
-  switch (n_type & MachO::NlistMaskType) {
-    case MachO::NListTypeUndefined :
-      Res = SymbolRef::ST_Unknown;
-      break;
-    case MachO::NListTypeSection :
-      Res = SymbolRef::ST_Function;
-      break;
-  }
   return object_error::success;
 }
 
@@ -375,242 +661,101 @@ error_code MachOObjectFile::getSymbolValue(DataRefImpl Symb,
   report_fatal_error("getSymbolValue unimplemented in MachOObjectFile");
 }
 
-symbol_iterator MachOObjectFile::begin_symbols() const {
-  // DRI.d.a = segment number; DRI.d.b = symbol index.
-  DataRefImpl DRI;
-  moveToNextSymbol(DRI);
-  return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::end_symbols() const {
-  DataRefImpl DRI;
-  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
-  // TODO: implement
-  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
-  // TODO: implement
-  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::begin_libraries_needed() const {
-  // TODO: implement
-  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::end_libraries_needed() const {
-  // TODO: implement
-  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-StringRef MachOObjectFile::getLoadName() const {
-  // TODO: Implement
-  report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
-}
-
-/*===-- Sections ----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    if (LCI.Command.Type == macho::LCT_Segment) {
-      InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
-      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
-      if (DRI.d.b < SegmentLoadCmd->NumSections)
-        return;
-    } else if (LCI.Command.Type == macho::LCT_Segment64) {
-      InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
-      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
-      if (DRI.d.b < Segment64LoadCmd->NumSections)
-        return;
-    }
-
-    DRI.d.a++;
-    DRI.d.b = 0;
-  }
-}
-
-error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
-                                           SectionRef &Result) const {
-  DRI.d.b++;
-  moveToNextSection(DRI);
-  Result = SectionRef(DRI, this);
+error_code MachOObjectFile::getSectionNext(DataRefImpl Sec,
+                                           SectionRef &Res) const {
+  Sec.d.a++;
+  Res = SectionRef(Sec, this);
   return object_error::success;
 }
 
-void
-MachOObjectFile::getSection(DataRefImpl DRI,
-                            InMemoryStruct<macho::Section> &Res) const {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSection(LCI, DRI.d.b, Res);
-}
-
-std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
-  SectionList::const_iterator loc =
-    std::find(Sections.begin(), Sections.end(), Sec);
-  assert(loc != Sections.end() && "Sec is not a valid section!");
-  return std::distance(Sections.begin(), loc);
-}
-
-void
-MachOObjectFile::getSection64(DataRefImpl DRI,
-                            InMemoryStruct<macho::Section64> &Res) const {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSection64(LCI, DRI.d.b, Res);
-}
-
-static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  if (LCI.Command.Type == macho::LCT_Segment64)
-    return true;
-  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
-  return false;
-}
-
-static StringRef parseSegmentOrSectionName(const char *P) {
-  if (P[15] == 0)
-    // Null terminated.
-    return P;
-  // Not null terminated, so this is a 16 char string.
-  return StringRef(P, 16);
-}
-
-error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
-                                           StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-      DRI.d.b * sizeof(macho::Section64);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
-    const macho::Section64 *sec =
-      reinterpret_cast<const macho::Section64*>(Data.data());
-    Result = parseSegmentOrSectionName(sec->Name);
-  } else {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-      DRI.d.b * sizeof(macho::Section);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
-    const macho::Section *sec =
-      reinterpret_cast<const macho::Section*>(Data.data());
-    Result = parseSegmentOrSectionName(sec->Name);
-  }
+error_code
+MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const {
+  ArrayRef<char> Raw = getSectionRawName(Sec);
+  Result = parseSegmentOrSectionName(Raw.data());
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec,
-                                                       StringRef &Res) const {
-  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-      Sec.d.b * sizeof(macho::Section64);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
-    const macho::Section64 *sec =
-      reinterpret_cast<const macho::Section64*>(Data.data());
-    Res = parseSegmentOrSectionName(sec->SegmentName);
+error_code
+MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Res = Sect.Address;
   } else {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-      Sec.d.b * sizeof(macho::Section);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
-    const macho::Section *sec =
-      reinterpret_cast<const macho::Section*>(Data.data());
-    Res = parseSegmentOrSectionName(sec->SegmentName);
+    macho::Section Sect = getSection(Sec);
+    Res = Sect.Address;
   }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
-                                              uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Address;
+error_code
+MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Res = Sect.Size;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Address;
+    macho::Section Sect = getSection(Sec);
+    Res = Sect.Size;
   }
-  return object_error::success;
-}
 
-error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
-                                           uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Size;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Size;
-  }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
-                                               StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+error_code
+MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const {
+  uint32_t Offset;
+  uint64_t Size;
+
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Offset = Sect.Offset;
+    Size = Sect.Size;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+    macho::Section Sect =getSection(Sec);
+    Offset = Sect.Offset;
+    Size = Sect.Size;
   }
+
+  Res = this->getData().substr(Offset, Size);
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
-                                                uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = uint64_t(1) << Sect->Align;
+error_code
+MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const {
+  uint32_t Align;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Align = Sect.Align;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = uint64_t(1) << Sect->Align;
+    macho::Section Sect = getSection(Sec);
+    Align = Sect.Align;
   }
+
+  Res = uint64_t(1) << Align;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
-                                          bool &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Flags & macho::SF_PureInstructions;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Flags & macho::SF_PureInstructions;
-  }
+error_code
+MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const {
+  uint32_t Flags = getSectionFlags(this, Sec);
+  Res = Flags & macho::SF_PureInstructions;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionData(DataRefImpl DRI,
-                                          bool &Result) const {
+error_code MachOObjectFile::isSectionData(DataRefImpl DRI, bool &Result) const {
   // FIXME: Unimplemented.
   Result = false;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
-                                         bool &Result) const {
+error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, bool &Result) const {
   // FIXME: Unimplemented.
   Result = false;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
-                                                          bool &Result) const {
+error_code
+MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+                                               bool &Result) const {
   // FIXME: Unimplemented.
   Result = true;
   return object_error::success;
@@ -623,22 +768,12 @@ error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
-                                              bool &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
-    Result = (SectionType == MachO::SectionTypeZeroFill ||
-              SectionType == MachO::SectionTypeZeroFillLarge);
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
-    Result = (SectionType == MachO::SectionTypeZeroFill ||
-              SectionType == MachO::SectionTypeZeroFillLarge);
-  }
-
+error_code
+MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const {
+  uint32_t Flags = getSectionFlags(this, Sec);
+  unsigned SectionType = Flags & MachO::SectionFlagMaskSectionType;
+  Res = SectionType == MachO::SectionTypeZeroFill ||
+    SectionType == MachO::SectionTypeZeroFillLarge;
   return object_error::success;
 }
 
@@ -653,11 +788,11 @@ error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
   return object_error::success;
 }
 
-error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
-                                                  DataRefImpl Symb,
-                                                  bool &Result) const {
+error_code
+MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                       bool &Result) const {
   SymbolRef::Type ST;
-  getSymbolType(Symb, ST);
+  this->getSymbolType(Symb, ST);
   if (ST == SymbolRef::ST_Unknown) {
     Result = false;
     return object_error::success;
@@ -668,164 +803,107 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
   getSectionSize(Sec, SectEnd);
   SectEnd += SectBegin;
 
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    uint64_t SymAddr= Entry->Value;
-    Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    uint64_t SymAddr= Entry->Value;
-    Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-  }
+  uint64_t SymAddr;
+  getSymbolAddress(Symb, SymAddr);
+  Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
 
   return object_error::success;
 }
 
 relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
-  DataRefImpl ret;
-  ret.d.b = getSectionIndex(Sec);
-  return relocation_iterator(RelocationRef(ret, this));
-}
-relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
-  uint32_t last_reloc;
-  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sec, Sect);
-    last_reloc = Sect->NumRelocationTableEntries;
+  uint32_t Offset;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Offset = Sect.RelocationTableOffset;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sec, Sect);
-    last_reloc = Sect->NumRelocationTableEntries;
+    macho::Section Sect = getSection(Sec);
+    Offset = Sect.RelocationTableOffset;
   }
-  DataRefImpl ret;
-  ret.d.a = last_reloc;
-  ret.d.b = getSectionIndex(Sec);
-  return relocation_iterator(RelocationRef(ret, this));
-}
-
-section_iterator MachOObjectFile::begin_sections() const {
-  DataRefImpl DRI;
-  moveToNextSection(DRI);
-  return section_iterator(SectionRef(DRI, this));
-}
 
-section_iterator MachOObjectFile::end_sections() const {
-  DataRefImpl DRI;
-  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  return section_iterator(SectionRef(DRI, this));
+  DataRefImpl Ret;
+  Ret.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+  return relocation_iterator(RelocationRef(Ret, this));
 }
 
-/*===-- Relocations -------------------------------------------------------===*/
-
-void MachOObjectFile::
-getRelocation(DataRefImpl Rel,
-              InMemoryStruct<macho::RelocationEntry> &Res) const {
-  uint32_t relOffset;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sections[Rel.d.b], Sect);
-    relOffset = Sect->RelocationTableOffset;
+relocation_iterator
+MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+  uint32_t Offset;
+  uint32_t Num;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Offset = Sect.RelocationTableOffset;
+    Num = Sect.NumRelocationTableEntries;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sections[Rel.d.b], Sect);
-    relOffset = Sect->RelocationTableOffset;
+    macho::Section Sect = getSection(Sec);
+    Offset = Sect.RelocationTableOffset;
+    Num = Sect.NumRelocationTableEntries;
   }
-  MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res);
+
+  const macho::RelocationEntry *P =
+    reinterpret_cast<const macho::RelocationEntry*>(getPtr(this, Offset));
+
+  DataRefImpl Ret;
+  Ret.p = reinterpret_cast<uintptr_t>(P + Num);
+  return relocation_iterator(RelocationRef(Ret, this));
 }
+
 error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
                                               RelocationRef &Res) const {
-  ++Rel.d.a;
+  const macho::RelocationEntry *P =
+    reinterpret_cast<const macho::RelocationEntry *>(Rel.p);
+  Rel.p = reinterpret_cast<uintptr_t>(P + 1);
   Res = RelocationRef(Rel, this);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
-                                                 uint64_t &Res) const {
-  const uint8_t* sectAddress = 0;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Address;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Address;
-  }
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  uint64_t RelAddr = 0;
-  if (isScattered)
-    RelAddr = RE->Word0 & 0xFFFFFF;
-  else
-    RelAddr = RE->Word0;
 
-  Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
-  return object_error::success;
+error_code
+MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const {
+  report_fatal_error("getRelocationAddress not implemented in MachOObjectFile");
 }
+
 error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
                                                 uint64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  if (isScattered)
-    Res = RE->Word0 & 0xFFFFFF;
-  else
-    Res = RE->Word0;
+  macho::RelocationEntry RE = getRelocation(Rel);
+  Res = getAnyRelocationAddress(RE);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
-                                                SymbolRef &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  uint32_t SymbolIdx = RE->Word1 & 0xffffff;
-  bool isExtern = (RE->Word1 >> 27) & 1;
 
-  DataRefImpl Sym;
-  moveToNextSymbol(Sym);
-  if (isExtern) {
-    for (unsigned i = 0; i < SymbolIdx; i++) {
-      Sym.d.b++;
-      moveToNextSymbol(Sym);
-      assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands &&
-             "Relocation symbol index out of range!");
-    }
+error_code
+MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const {
+  macho::RelocationEntry RE = getRelocation(Rel);
+  uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE);
+  bool isExtern = getPlainRelocationExternal(RE);
+  if (!isExtern) {
+    Res = *end_symbols();
+    return object_error::success;
   }
+
+  macho::SymtabLoadCommand S = getSymtabLoadCommand();
+  unsigned SymbolTableEntrySize = is64Bit() ?
+    sizeof(macho::Symbol64TableEntry) :
+    sizeof(macho::SymbolTableEntry);
+  uint64_t Offset = S.SymbolTableOffset + SymbolIdx * SymbolTableEntrySize;
+  DataRefImpl Sym;
+  Sym.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
   Res = SymbolRef(Sym, this);
   return object_error::success;
 }
+
 error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
                                               uint64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  Res = RE->Word0;
-  Res <<= 32;
-  Res |= RE->Word1;
+  macho::RelocationEntry RE = getRelocation(Rel);
+  Res = getAnyRelocationType(RE);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
-  // TODO: Support scattered relocations.
-  StringRef res;
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
 
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
+error_code
+MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
+                                       SmallVectorImpl<char> &Result) const {
+  StringRef res;
+  uint64_t RType;
+  getRelocationType(Rel, RType);
 
-  unsigned r_type;
-  if (isScattered)
-    r_type = (RE->Word0 >> 24) & 0xF;
-  else
-    r_type = (RE->Word1 >> 28) & 0xF;
+  unsigned Arch = this->getArch();
 
   switch (Arch) {
     case Triple::x86: {
@@ -837,10 +915,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "GENERIC_RELOC_LOCAL_SECTDIFF",
         "GENERIC_RELOC_TLV" };
 
-      if (r_type > 6)
+      if (RType > 6)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::x86_64: {
@@ -856,10 +934,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "X86_64_RELOC_SIGNED_4",
         "X86_64_RELOC_TLV" };
 
-      if (r_type > 9)
+      if (RType > 9)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::arm: {
@@ -875,10 +953,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "ARM_RELOC_HALF",
         "ARM_RELOC_HALF_SECTDIFF" };
 
-      if (r_type > 9)
+      if (RType > 9)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::ppc: {
@@ -900,7 +978,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "PPC_RELOC_LO14_SECTDIFF",
         "PPC_RELOC_LOCAL_SECTDIFF" };
 
-      res = Table[r_type];
+      res = Table[RType];
       break;
     }
     case Triple::UnknownArch:
@@ -910,193 +988,79 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
   Result.append(res.begin(), res.end());
   return object_error::success;
 }
+
 error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
                                                         int64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  bool isExtern = (RE->Word1 >> 27) & 1;
   Res = 0;
-  if (!isExtern) {
-    const uint8_t* sectAddress = base();
-    if (MachOObj->is64Bit()) {
-      InMemoryStruct<macho::Section64> Sect;
-      getSection64(Sections[Rel.d.b], Sect);
-      sectAddress += Sect->Offset;
-    } else {
-      InMemoryStruct<macho::Section> Sect;
-      getSection(Sections[Rel.d.b], Sect);
-      sectAddress += Sect->Offset;
-    }
-    Res = reinterpret_cast<uintptr_t>(sectAddress);
-  }
   return object_error::success;
 }
 
-// Helper to advance a section or symbol iterator multiple increments at a time.
-template<class T>
-error_code advance(T &it, size_t Val) {
-  error_code ec;
-  while (Val--) {
-    it.increment(ec);
-  }
-  return ec;
-}
-
-template<class T>
-void advanceTo(T &it, size_t Val) {
-  if (error_code ec = advance(it, Val))
-    report_fatal_error(ec.message());
-}
-
-void MachOObjectFile::printRelocationTargetName(
-                                     InMemoryStruct<macho::RelocationEntry>& RE,
-                                     raw_string_ostream &fmt) const {
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-
-  // Target of a scattered relocation is an address.  In the interest of
-  // generating pretty output, scan through the symbol table looking for a
-  // symbol that aligns with that address.  If we find one, print it.
-  // Otherwise, we just print the hex address of the target.
-  if (isScattered) {
-    uint32_t Val = RE->Word1;
-
-    error_code ec;
-    for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE;
-        SI.increment(ec)) {
-      if (ec) report_fatal_error(ec.message());
-
-      uint64_t Addr;
-      StringRef Name;
-
-      if ((ec = SI->getAddress(Addr)))
-        report_fatal_error(ec.message());
-      if (Addr != Val) continue;
-      if ((ec = SI->getName(Name)))
-        report_fatal_error(ec.message());
-      fmt << Name;
-      return;
-    }
-
-    // If we couldn't find a symbol that this relocation refers to, try
-    // to find a section beginning instead.
-    for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE;
-         SI.increment(ec)) {
-      if (ec) report_fatal_error(ec.message());
-
-      uint64_t Addr;
-      StringRef Name;
-
-      if ((ec = SI->getAddress(Addr)))
-        report_fatal_error(ec.message());
-      if (Addr != Val) continue;
-      if ((ec = SI->getName(Name)))
-        report_fatal_error(ec.message());
-      fmt << Name;
-      return;
-    }
-
-    fmt << format("0x%x", Val);
-    return;
-  }
-
-  StringRef S;
-  bool isExtern = (RE->Word1 >> 27) & 1;
-  uint32_t Val = RE->Word1 & 0xFFFFFF;
-
-  if (isExtern) {
-    symbol_iterator SI = begin_symbols();
-    advanceTo(SI, Val);
-    SI->getName(S);
-  } else {
-    section_iterator SI = begin_sections();
-    advanceTo(SI, Val);
-    SI->getName(S);
-  }
-
-  fmt << S;
-}
-
-error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
+error_code
+MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
                                           SmallVectorImpl<char> &Result) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
+  macho::RelocationEntry RE = getRelocation(Rel);
 
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
+  unsigned Arch = this->getArch();
 
   std::string fmtbuf;
   raw_string_ostream fmt(fmtbuf);
-
-  unsigned Type;
-  if (isScattered)
-    Type = (RE->Word0 >> 24) & 0xF;
-  else
-    Type = (RE->Word1 >> 28) & 0xF;
-
-  bool isPCRel;
-  if (isScattered)
-    isPCRel = ((RE->Word0 >> 30) & 1);
-  else
-    isPCRel = ((RE->Word1 >> 24) & 1);
+  unsigned Type = this->getAnyRelocationType(RE);
+  bool IsPCRel = this->getAnyRelocationPCRel(RE);
 
   // Determine any addends that should be displayed with the relocation.
   // These require decoding the relocation type, which is triple-specific.
 
   // X86_64 has entirely custom relocation types.
   if (Arch == Triple::x86_64) {
-    bool isPCRel = ((RE->Word1 >> 24) & 1);
+    bool isPCRel = getAnyRelocationPCRel(RE);
 
     switch (Type) {
       case macho::RIT_X86_64_GOTLoad:   // X86_64_RELOC_GOT_LOAD
       case macho::RIT_X86_64_GOT: {     // X86_64_RELOC_GOT
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "@GOT";
         if (isPCRel) fmt << "PCREL";
         break;
       }
       case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
-        InMemoryStruct<macho::RelocationEntry> RENext;
         DataRefImpl RelNext = Rel;
         RelNext.d.a++;
-        getRelocation(RelNext, RENext);
+        macho::RelocationEntry RENext = getRelocation(RelNext);
 
         // X86_64_SUBTRACTOR must be followed by a relocation of type
         // X86_64_RELOC_UNSIGNED.
         // NOTE: Scattered relocations don't exist on x86_64.
-        unsigned RType = (RENext->Word1 >> 28) & 0xF;
+        unsigned RType = getAnyRelocationType(RENext);
         if (RType != 0)
           report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
                              "X86_64_RELOC_SUBTRACTOR.");
 
         // The X86_64_RELOC_UNSIGNED contains the minuend symbol,
         // X86_64_SUBTRACTOR contains to the subtrahend.
-        printRelocationTargetName(RENext, fmt);
+        printRelocationTargetName(this, RENext, fmt);
         fmt << "-";
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         break;
       }
       case macho::RIT_X86_64_TLV:
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "@TLV";
         if (isPCRel) fmt << "P";
         break;
       case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-1";
         break;
       case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-2";
         break;
       case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-4";
         break;
       default:
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         break;
     }
   // X86 and ARM share some relocation types in common.
@@ -1106,27 +1070,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
       case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
         return object_error::success;
       case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
-        InMemoryStruct<macho::RelocationEntry> RENext;
         DataRefImpl RelNext = Rel;
         RelNext.d.a++;
-        getRelocation(RelNext, RENext);
+        macho::RelocationEntry RENext = getRelocation(RelNext);
 
         // X86 sect diff's must be followed by a relocation of type
         // GENERIC_RELOC_PAIR.
-        bool isNextScattered = (Arch != Triple::x86_64) &&
-                               (RENext->Word0 & macho::RF_Scattered);
-        unsigned RType;
-        if (isNextScattered)
-          RType = (RENext->Word0 >> 24) & 0xF;
-        else
-          RType = (RENext->Word1 >> 28) & 0xF;
+        unsigned RType = getAnyRelocationType(RENext);
+
         if (RType != 1)
           report_fatal_error("Expected GENERIC_RELOC_PAIR after "
                              "GENERIC_RELOC_SECTDIFF.");
 
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-";
-        printRelocationTargetName(RENext, fmt);
+        printRelocationTargetName(this, RENext, fmt);
         break;
       }
     }
@@ -1136,37 +1094,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
       // handled in the generic code.
       switch (Type) {
         case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
-          InMemoryStruct<macho::RelocationEntry> RENext;
           DataRefImpl RelNext = Rel;
           RelNext.d.a++;
-          getRelocation(RelNext, RENext);
+          macho::RelocationEntry RENext = getRelocation(RelNext);
 
           // X86 sect diff's must be followed by a relocation of type
           // GENERIC_RELOC_PAIR.
-          bool isNextScattered = (Arch != Triple::x86_64) &&
-                               (RENext->Word0 & macho::RF_Scattered);
-          unsigned RType;
-          if (isNextScattered)
-            RType = (RENext->Word0 >> 24) & 0xF;
-          else
-            RType = (RENext->Word1 >> 28) & 0xF;
+          unsigned RType = getAnyRelocationType(RENext);
           if (RType != 1)
             report_fatal_error("Expected GENERIC_RELOC_PAIR after "
                                "GENERIC_RELOC_LOCAL_SECTDIFF.");
 
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
           fmt << "-";
-          printRelocationTargetName(RENext, fmt);
+          printRelocationTargetName(this, RENext, fmt);
           break;
         }
         case macho::RIT_Generic_TLV: {
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
           fmt << "@TLV";
-          if (isPCRel) fmt << "P";
+          if (IsPCRel) fmt << "P";
           break;
         }
         default:
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
       }
     } else { // ARM-specific relocations
       switch (Type) {
@@ -1174,33 +1125,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
         case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
           // Half relocations steal a bit from the length field to encode
           // whether this is an upper16 or a lower16 relocation.
-          bool isUpper;
-          if (isScattered)
-            isUpper = (RE->Word0 >> 28) & 1;
-          else
-            isUpper = (RE->Word1 >> 25) & 1;
+          bool isUpper = getAnyRelocationLength(RE) >> 1;
 
           if (isUpper)
             fmt << ":upper16:(";
           else
             fmt << ":lower16:(";
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
 
-          InMemoryStruct<macho::RelocationEntry> RENext;
           DataRefImpl RelNext = Rel;
           RelNext.d.a++;
-          getRelocation(RelNext, RENext);
+          macho::RelocationEntry RENext = getRelocation(RelNext);
 
           // ARM half relocs must be followed by a relocation of type
           // ARM_RELOC_PAIR.
-          bool isNextScattered = (Arch != Triple::x86_64) &&
-                                 (RENext->Word0 & macho::RF_Scattered);
-          unsigned RType;
-          if (isNextScattered)
-            RType = (RENext->Word0 >> 24) & 0xF;
-          else
-            RType = (RENext->Word1 >> 28) & 0xF;
-
+          unsigned RType = getAnyRelocationType(RENext);
           if (RType != 1)
             report_fatal_error("Expected ARM_RELOC_PAIR after "
                                "GENERIC_RELOC_HALF");
@@ -1214,38 +1153,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
           // symbol/section pointer of the follow-on relocation.
           if (Type == macho::RIT_ARM_HalfDifference) {
             fmt << "-";
-            printRelocationTargetName(RENext, fmt);
+            printRelocationTargetName(this, RENext, fmt);
           }
 
           fmt << ")";
           break;
         }
         default: {
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
         }
       }
     }
   } else
-    printRelocationTargetName(RE, fmt);
+    printRelocationTargetName(this, RE, fmt);
 
   fmt.flush();
   Result.append(fmtbuf.begin(), fmtbuf.end());
   return object_error::success;
 }
 
-error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
-                                                bool &Result) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
+error_code
+MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const {
   unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  unsigned Type;
-  if (isScattered)
-    Type = (RE->Word0 >> 24) & 0xF;
-  else
-    Type = (RE->Word1 >> 28) & 0xF;
+  uint64_t Type;
+  getRelocationType(Rel, Type);
 
   Result = false;
 
@@ -1259,12 +1190,10 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
     if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
       DataRefImpl RelPrev = Rel;
       RelPrev.d.a--;
-      InMemoryStruct<macho::RelocationEntry> REPrev;
-      getRelocation(RelPrev, REPrev);
-
-      unsigned PrevType = (REPrev->Word1 >> 28) & 0xF;
-
-      if (PrevType == macho::RIT_X86_64_Subtractor) Result = true;
+      uint64_t PrevType;
+      getRelocationType(RelPrev, PrevType);
+      if (PrevType == macho::RIT_X86_64_Subtractor)
+        Result = true;
     }
   }
 
@@ -1281,16 +1210,70 @@ error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
   report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
 }
 
+symbol_iterator MachOObjectFile::begin_symbols() const {
+  DataRefImpl DRI;
+  if (!SymtabLoadCmd)
+    return symbol_iterator(SymbolRef(DRI, this));
+
+  macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.SymbolTableOffset));
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::end_symbols() const {
+  DataRefImpl DRI;
+  if (!SymtabLoadCmd)
+    return symbol_iterator(SymbolRef(DRI, this));
+
+  macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+  unsigned SymbolTableEntrySize = is64Bit() ?
+    sizeof(macho::Symbol64TableEntry) :
+    sizeof(macho::SymbolTableEntry);
+  unsigned Offset = Symtab.SymbolTableOffset +
+    Symtab.NumSymbolTableEntries * SymbolTableEntrySize;
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+section_iterator MachOObjectFile::begin_sections() const {
+  DataRefImpl DRI;
+  return section_iterator(SectionRef(DRI, this));
+}
+
+section_iterator MachOObjectFile::end_sections() const {
+  DataRefImpl DRI;
+  DRI.d.a = Sections.size();
+  return section_iterator(SectionRef(DRI, this));
+}
 
-/*===-- Miscellaneous -----------------------------------------------------===*/
+library_iterator MachOObjectFile::begin_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::end_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
 
 uint8_t MachOObjectFile::getBytesInAddress() const {
-  return MachOObj->is64Bit() ? 8 : 4;
+  return is64Bit() ? 8 : 4;
 }
 
 StringRef MachOObjectFile::getFileFormatName() const {
-  if (!MachOObj->is64Bit()) {
-    switch (MachOObj->getHeader().CPUType) {
+  unsigned CPUType = getCPUType(this);
+  if (!is64Bit()) {
+    switch (CPUType) {
     case llvm::MachO::CPUTypeI386:
       return "Mach-O 32-bit i386";
     case llvm::MachO::CPUTypeARM:
@@ -1298,18 +1281,18 @@ StringRef MachOObjectFile::getFileFormatName() const {
     case llvm::MachO::CPUTypePowerPC:
       return "Mach-O 32-bit ppc";
     default:
-      assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+      assert((CPUType & llvm::MachO::CPUArchABI64) == 0 &&
              "64-bit object file when we're not 64-bit?");
       return "Mach-O 32-bit unknown";
     }
   }
 
   // Make sure the cpu type has the correct mask.
-  assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64)
+  assert((CPUType & llvm::MachO::CPUArchABI64)
 	 == llvm::MachO::CPUArchABI64 &&
 	 "32-bit object file when we're 64-bit?");
 
-  switch (MachOObj->getHeader().CPUType) {
+  switch (CPUType) {
   case llvm::MachO::CPUTypeX86_64:
     return "Mach-O 64-bit x86-64";
   case llvm::MachO::CPUTypePowerPC64:
@@ -1320,7 +1303,7 @@ StringRef MachOObjectFile::getFileFormatName() const {
 }
 
 unsigned MachOObjectFile::getArch() const {
-  switch (MachOObj->getHeader().CPUType) {
+  switch (getCPUType(this)) {
   case llvm::MachO::CPUTypeI386:
     return Triple::x86;
   case llvm::MachO::CPUTypeX86_64:
@@ -1336,5 +1319,260 @@ unsigned MachOObjectFile::getArch() const {
   }
 }
 
+StringRef MachOObjectFile::getLoadName() const {
+  // TODO: Implement
+  report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+}
+
+relocation_iterator MachOObjectFile::getSectionRelBegin(unsigned Index) const {
+  DataRefImpl DRI;
+  DRI.d.a = Index;
+  return getSectionRelBegin(DRI);
+}
+
+relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const {
+  DataRefImpl DRI;
+  DRI.d.a = Index;
+  return getSectionRelEnd(DRI);
+}
+
+StringRef
+MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
+  ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
+  return parseSegmentOrSectionName(Raw.data());
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
+  const SectionBase *Base =
+    reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+  return ArrayRef<char>(Base->Name);
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
+  const SectionBase *Base =
+    reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+  return ArrayRef<char>(Base->SegmentName);
+}
+
+bool
+MachOObjectFile::isRelocationScattered(const macho::RelocationEntry &RE)
+  const {
+  if (getCPUType(this) == llvm::MachO::CPUTypeX86_64)
+    return false;
+  return getPlainRelocationAddress(RE) & macho::RF_Scattered;
+}
+
+unsigned MachOObjectFile::getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const {
+  if (isLittleEndian())
+    return RE.Word1 & 0xffffff;
+  return RE.Word1 >> 8;
+}
+
+bool MachOObjectFile::getPlainRelocationExternal(const macho::RelocationEntry &RE) const {
+  if (isLittleEndian())
+    return (RE.Word1 >> 27) & 1;
+  return (RE.Word1 >> 4) & 1;
+}
+
+bool
+MachOObjectFile::getScatteredRelocationScattered(const macho::RelocationEntry &RE) const {
+  return RE.Word0 >> 31;
+}
+
+uint32_t
+MachOObjectFile::getScatteredRelocationValue(const macho::RelocationEntry &RE) const {
+  return RE.Word1;
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationAddress(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationAddress(RE);
+  return getPlainRelocationAddress(RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationPCRel(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationPCRel(this, RE);
+  return getPlainRelocationPCRel(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationLength(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationLength(RE);
+  return getPlainRelocationLength(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationType(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationType(RE);
+  return getPlainRelocationType(this, RE);
+}
+
+SectionRef
+MachOObjectFile::getRelocationSection(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE) || getPlainRelocationExternal(RE))
+    return *end_sections();
+  unsigned SecNum = getPlainRelocationSymbolNum(RE) - 1;
+  DataRefImpl DRI;
+  DRI.d.a = SecNum;
+  return SectionRef(DRI, this);
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getFirstLoadCommandInfo() const {
+  MachOObjectFile::LoadCommandInfo Load;
+
+  unsigned HeaderSize = is64Bit() ? macho::Header64Size : macho::Header32Size;
+  Load.Ptr = getPtr(this, HeaderSize);
+  Load.C = getStruct<macho::LoadCommand>(this, Load.Ptr);
+  return Load;
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const {
+  MachOObjectFile::LoadCommandInfo Next;
+  Next.Ptr = L.Ptr + L.C.Size;
+  Next.C = getStruct<macho::LoadCommand>(this, Next.Ptr);
+  return Next;
+}
+
+macho::Section MachOObjectFile::getSection(DataRefImpl DRI) const {
+  return getStruct<macho::Section>(this, Sections[DRI.d.a]);
+}
+
+macho::Section64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
+  return getStruct<macho::Section64>(this, Sections[DRI.d.a]);
+}
+
+macho::Section MachOObjectFile::getSection(const LoadCommandInfo &L,
+                                           unsigned Index) const {
+  const char *Sec = getSectionPtr(this, L, Index);
+  return getStruct<macho::Section>(this, Sec);
+}
+
+macho::Section64 MachOObjectFile::getSection64(const LoadCommandInfo &L,
+                                               unsigned Index) const {
+  const char *Sec = getSectionPtr(this, L, Index);
+  return getStruct<macho::Section64>(this, Sec);
+}
+
+macho::SymbolTableEntry
+MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const {
+  const char *P = reinterpret_cast<const char *>(DRI.p);
+  return getStruct<macho::SymbolTableEntry>(this, P);
+}
+
+macho::Symbol64TableEntry
+MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const {
+  const char *P = reinterpret_cast<const char *>(DRI.p);
+  return getStruct<macho::Symbol64TableEntry>(this, P);
+}
+
+macho::LinkeditDataLoadCommand
+MachOObjectFile::getLinkeditDataLoadCommand(const MachOObjectFile::LoadCommandInfo &L) const {
+  return getStruct<macho::LinkeditDataLoadCommand>(this, L.Ptr);
+}
+
+macho::SegmentLoadCommand
+MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const {
+  return getStruct<macho::SegmentLoadCommand>(this, L.Ptr);
+}
+
+macho::Segment64LoadCommand
+MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const {
+  return getStruct<macho::Segment64LoadCommand>(this, L.Ptr);
+}
+
+macho::LinkerOptionsLoadCommand
+MachOObjectFile::getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const {
+  return getStruct<macho::LinkerOptionsLoadCommand>(this, L.Ptr);
+}
+
+macho::RelocationEntry
+MachOObjectFile::getRelocation(DataRefImpl Rel) const {
+  const char *P = reinterpret_cast<const char *>(Rel.p);
+  return getStruct<macho::RelocationEntry>(this, P);
+}
+
+macho::Header MachOObjectFile::getHeader() const {
+  return getStruct<macho::Header>(this, getPtr(this, 0));
+}
+
+macho::Header64Ext MachOObjectFile::getHeader64Ext() const {
+  return
+    getStruct<macho::Header64Ext>(this, getPtr(this, sizeof(macho::Header)));
+}
+
+macho::IndirectSymbolTableEntry MachOObjectFile::getIndirectSymbolTableEntry(
+                                          const macho::DysymtabLoadCommand &DLC,
+                                          unsigned Index) const {
+  uint64_t Offset = DLC.IndirectSymbolTableOffset +
+    Index * sizeof(macho::IndirectSymbolTableEntry);
+  return getStruct<macho::IndirectSymbolTableEntry>(this, getPtr(this, Offset));
+}
+
+macho::DataInCodeTableEntry
+MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
+                                         unsigned Index) const {
+  uint64_t Offset = DataOffset + Index * sizeof(macho::DataInCodeTableEntry);
+  return getStruct<macho::DataInCodeTableEntry>(this, getPtr(this, Offset));
+}
+
+macho::SymtabLoadCommand MachOObjectFile::getSymtabLoadCommand() const {
+  return getStruct<macho::SymtabLoadCommand>(this, SymtabLoadCmd);
+}
+
+macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const {
+  return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
+}
+
+StringRef MachOObjectFile::getStringTableData() const {
+  macho::SymtabLoadCommand S = getSymtabLoadCommand();
+  return getData().substr(S.StringTableOffset, S.StringTableSize);
+}
+
+bool MachOObjectFile::is64Bit() const {
+  return getType() == getMachOType(false, true) ||
+    getType() == getMachOType(true, true);
+}
+
+void MachOObjectFile::ReadULEB128s(uint64_t Index,
+                                   SmallVectorImpl<uint64_t> &Out) const {
+  DataExtractor extractor(ObjectFile::getData(), true, 0);
+
+  uint32_t offset = Index;
+  uint64_t data = 0;
+  while (uint64_t delta = extractor.getULEB128(&offset)) {
+    data += delta;
+    Out.push_back(data);
+  }
+}
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  StringRef Magic = Buffer->getBuffer().slice(0, 4);
+  error_code ec;
+  ObjectFile *Ret;
+  if (Magic == "\xFE\xED\xFA\xCE")
+    Ret = new MachOObjectFile(Buffer, false, false, ec);
+  else if (Magic == "\xCE\xFA\xED\xFE")
+    Ret = new MachOObjectFile(Buffer, true, false, ec);
+  else if (Magic == "\xFE\xED\xFA\xCF")
+    Ret = new MachOObjectFile(Buffer, false, true, ec);
+  else if (Magic == "\xCF\xFA\xED\xFE")
+    Ret = new MachOObjectFile(Buffer, true, true, ec);
+  else
+    return NULL;
+
+  if (ec)
+    return NULL;
+  return Ret;
+}
+
 } // end namespace object
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
index f061ea7..3e2c78e 100644
--- a/contrib/llvm/lib/Object/Object.cpp
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -12,12 +12,51 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm-c/Object.h"
 
 using namespace llvm;
 using namespace object;
 
+inline ObjectFile *unwrap(LLVMObjectFileRef OF) {
+  return reinterpret_cast<ObjectFile*>(OF);
+}
+
+inline LLVMObjectFileRef wrap(const ObjectFile *OF) {
+  return reinterpret_cast<LLVMObjectFileRef>(const_cast<ObjectFile*>(OF));
+}
+
+inline section_iterator *unwrap(LLVMSectionIteratorRef SI) {
+  return reinterpret_cast<section_iterator*>(SI);
+}
+
+inline LLVMSectionIteratorRef
+wrap(const section_iterator *SI) {
+  return reinterpret_cast<LLVMSectionIteratorRef>
+    (const_cast<section_iterator*>(SI));
+}
+
+inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) {
+  return reinterpret_cast<symbol_iterator*>(SI);
+}
+
+inline LLVMSymbolIteratorRef
+wrap(const symbol_iterator *SI) {
+  return reinterpret_cast<LLVMSymbolIteratorRef>
+    (const_cast<symbol_iterator*>(SI));
+}
+
+inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) {
+  return reinterpret_cast<relocation_iterator*>(SI);
+}
+
+inline LLVMRelocationIteratorRef
+wrap(const relocation_iterator *SI) {
+  return reinterpret_cast<LLVMRelocationIteratorRef>
+    (const_cast<relocation_iterator*>(SI));
+}
+
 // ObjectFile creation
 LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
   return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index 860c87b..77fd995 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -23,10 +23,16 @@ using namespace object;
 
 void ObjectFile::anchor() { }
 
-ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source)
   : Binary(Type, source) {
 }
 
+error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI,
+                                          uint32_t &Result) const {
+  Result = 0;
+  return object_error::success;
+}
+
 ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
   if (!Object || Object->getBufferSize() < 64)
     return 0;
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index 560d7eb..18d3db5 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/system_error.h"
 #include <cerrno>
 #include <cstdlib>
+#include <map>
 using namespace llvm;
 using namespace cl;
 
@@ -106,6 +107,17 @@ void Option::addArgument() {
   MarkOptionsChanged();
 }
 
+// This collects the different option categories that have been registered.
+typedef SmallPtrSet<OptionCategory*,16> OptionCatSet;
+static ManagedStatic<OptionCatSet> RegisteredOptionCategories;
+
+// Initialise the general option category.
+OptionCategory llvm::cl::GeneralCategory("General options");
+
+void OptionCategory::registerCategory()
+{
+  RegisteredOptionCategories->insert(this);
+}
 
 //===----------------------------------------------------------------------===//
 // Basic, shared command line option processing machinery.
@@ -1222,11 +1234,20 @@ sortOpts(StringMap<Option*> &OptMap,
 namespace {
 
 class HelpPrinter {
+protected:
   const bool ShowHidden;
+  typedef SmallVector<std::pair<const char *, Option*>,128> StrOptionPairVector;
+  // Print the options. Opts is assumed to be alphabetically sorted.
+  virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
+    for (size_t i = 0, e = Opts.size(); i != e; ++i)
+      Opts[i].second->printOptionInfo(MaxArgLen);
+  }
 
 public:
   explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {}
+  virtual ~HelpPrinter() {}
 
+  // Invoke the printer.
   void operator=(bool Value) {
     if (Value == false) return;
 
@@ -1236,7 +1257,7 @@ public:
     StringMap<Option*> OptMap;
     GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
 
-    SmallVector<std::pair<const char *, Option*>, 128> Opts;
+    StrOptionPairVector Opts;
     sortOpts(OptMap, Opts, ShowHidden);
 
     if (ProgramOverview)
@@ -1267,12 +1288,12 @@ public:
       MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
 
     outs() << "OPTIONS:\n";
-    for (size_t i = 0, e = Opts.size(); i != e; ++i)
-      Opts[i].second->printOptionInfo(MaxArgLen);
+    printOptions(Opts, MaxArgLen);
 
     // Print any extra help the user has declared.
     for (std::vector<const char *>::iterator I = MoreHelp->begin(),
-          E = MoreHelp->end(); I != E; ++I)
+                                             E = MoreHelp->end();
+         I != E; ++I)
       outs() << *I;
     MoreHelp->clear();
 
@@ -1280,21 +1301,152 @@ public:
     exit(1);
   }
 };
+
+class CategorizedHelpPrinter : public HelpPrinter {
+public:
+  explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {}
+
+  // Helper function for printOptions().
+  // It shall return true if A's name should be lexographically
+  // ordered before B's name. It returns false otherwise.
+  static bool OptionCategoryCompare(OptionCategory *A, OptionCategory *B) {
+    int Length = strcmp(A->getName(), B->getName());
+    assert(Length != 0 && "Duplicate option categories");
+    return Length < 0;
+  }
+
+  // Make sure we inherit our base class's operator=()
+  using HelpPrinter::operator= ;
+
+protected:
+  virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
+    std::vector<OptionCategory *> SortedCategories;
+    std::map<OptionCategory *, std::vector<Option *> > CategorizedOptions;
+
+    // Collect registered option categories into vector in preperation for
+    // sorting.
+    for (OptionCatSet::const_iterator I = RegisteredOptionCategories->begin(),
+                                      E = RegisteredOptionCategories->end();
+         I != E; ++I)
+      SortedCategories.push_back(*I);
+
+    // Sort the different option categories alphabetically.
+    assert(SortedCategories.size() > 0 && "No option categories registered!");
+    std::sort(SortedCategories.begin(), SortedCategories.end(),
+              OptionCategoryCompare);
+
+    // Create map to empty vectors.
+    for (std::vector<OptionCategory *>::const_iterator
+             I = SortedCategories.begin(),
+             E = SortedCategories.end();
+         I != E; ++I)
+      CategorizedOptions[*I] = std::vector<Option *>();
+
+    // Walk through pre-sorted options and assign into categories.
+    // Because the options are already alphabetically sorted the
+    // options within categories will also be alphabetically sorted.
+    for (size_t I = 0, E = Opts.size(); I != E; ++I) {
+      Option *Opt = Opts[I].second;
+      assert(CategorizedOptions.count(Opt->Category) > 0 &&
+             "Option has an unregistered category");
+      CategorizedOptions[Opt->Category].push_back(Opt);
+    }
+
+    // Now do printing.
+    for (std::vector<OptionCategory *>::const_iterator
+             Category = SortedCategories.begin(),
+             E = SortedCategories.end();
+         Category != E; ++Category) {
+      // Hide empty categories for -help, but show for -help-hidden.
+      bool IsEmptyCategory = CategorizedOptions[*Category].size() == 0;
+      if (!ShowHidden && IsEmptyCategory)
+        continue;
+
+      // Print category information.
+      outs() << "\n";
+      outs() << (*Category)->getName() << ":\n";
+
+      // Check if description is set.
+      if ((*Category)->getDescription() != 0)
+        outs() << (*Category)->getDescription() << "\n\n";
+      else
+        outs() << "\n";
+
+      // When using -help-hidden explicitly state if the category has no
+      // options associated with it.
+      if (IsEmptyCategory) {
+        outs() << "  This option category has no options.\n";
+        continue;
+      }
+      // Loop over the options in the category and print.
+      for (std::vector<Option *>::const_iterator
+               Opt = CategorizedOptions[*Category].begin(),
+               E = CategorizedOptions[*Category].end();
+           Opt != E; ++Opt)
+        (*Opt)->printOptionInfo(MaxArgLen);
+    }
+  }
+};
+
+// This wraps the Uncategorizing and Categorizing printers and decides
+// at run time which should be invoked.
+class HelpPrinterWrapper {
+private:
+  HelpPrinter &UncategorizedPrinter;
+  CategorizedHelpPrinter &CategorizedPrinter;
+
+public:
+  explicit HelpPrinterWrapper(HelpPrinter &UncategorizedPrinter,
+                              CategorizedHelpPrinter &CategorizedPrinter) :
+    UncategorizedPrinter(UncategorizedPrinter),
+    CategorizedPrinter(CategorizedPrinter) { }
+
+  // Invoke the printer.
+  void operator=(bool Value);
+};
+
 } // End anonymous namespace
 
-// Define the two HelpPrinter instances that are used to print out help, or
-// help-hidden...
-//
-static HelpPrinter NormalPrinter(false);
-static HelpPrinter HiddenPrinter(true);
+// Declare the four HelpPrinter instances that are used to print out help, or
+// help-hidden as an uncategorized list or in categories.
+static HelpPrinter UncategorizedNormalPrinter(false);
+static HelpPrinter UncategorizedHiddenPrinter(true);
+static CategorizedHelpPrinter CategorizedNormalPrinter(false);
+static CategorizedHelpPrinter CategorizedHiddenPrinter(true);
+
 
+// Declare HelpPrinter wrappers that will decide whether or not to invoke
+// a categorizing help printer
+static HelpPrinterWrapper WrappedNormalPrinter(UncategorizedNormalPrinter,
+                                               CategorizedNormalPrinter);
+static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter,
+                                               CategorizedHiddenPrinter);
+
+// Define uncategorized help printers.
+// -help-list is hidden by default because if Option categories are being used
+// then -help behaves the same as -help-list.
 static cl::opt<HelpPrinter, true, parser<bool> >
-HOp("help", cl::desc("Display available options (-help-hidden for more)"),
-    cl::location(NormalPrinter), cl::ValueDisallowed);
+HLOp("help-list",
+     cl::desc("Display list of available options (-help-list-hidden for more)"),
+     cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed);
 
 static cl::opt<HelpPrinter, true, parser<bool> >
+HLHOp("help-list-hidden",
+     cl::desc("Display list of all available options"),
+     cl::location(UncategorizedHiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+// Define uncategorized/categorized help printers. These printers change their
+// behaviour at runtime depending on whether one or more Option categories have
+// been declared.
+static cl::opt<HelpPrinterWrapper, true, parser<bool> >
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+    cl::location(WrappedNormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinterWrapper, true, parser<bool> >
 HHOp("help-hidden", cl::desc("Display all available options"),
-     cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+     cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+
 
 static cl::opt<bool>
 PrintOptions("print-options",
@@ -1306,6 +1458,24 @@ PrintAllOptions("print-all-options",
                 cl::desc("Print all option values after command line parsing"),
                 cl::Hidden, cl::init(false));
 
+void HelpPrinterWrapper::operator=(bool Value) {
+  if (Value == false)
+    return;
+
+  // Decide which printer to invoke. If more than one option category is
+  // registered then it is useful to show the categorized help instead of
+  // uncategorized help.
+  if (RegisteredOptionCategories->size() > 1) {
+    // unhide -help-list option so user can have uncategorized output if they
+    // want it.
+    HLOp.setHiddenFlag(NotHidden);
+
+    CategorizedPrinter = true; // Invoke categorized printer
+  }
+  else
+    UncategorizedPrinter = true; // Invoke uncategorized printer
+}
+
 // Print the value of each option.
 void cl::PrintOptionValues() {
   if (!PrintOptions && !PrintAllOptions) return;
@@ -1393,14 +1563,22 @@ VersOp("version", cl::desc("Display the version of this program"),
     cl::location(VersionPrinterInstance), cl::ValueDisallowed);
 
 // Utility function for printing the help message.
-void cl::PrintHelpMessage() {
-  // This looks weird, but it actually prints the help message. The
-  // NormalPrinter variable is a HelpPrinter and the help gets printed when
-  // its operator= is invoked. That's because the "normal" usages of the
-  // help printer is to be assigned true/false depending on whether the
-  // -help option was given or not. Since we're circumventing that we have
-  // to make it look like -help was given, so we assign true.
-  NormalPrinter = true;
+void cl::PrintHelpMessage(bool Hidden, bool Categorized) {
+  // This looks weird, but it actually prints the help message. The Printers are
+  // types of HelpPrinter and the help gets printed when its operator= is
+  // invoked. That's because the "normal" usages of the help printer is to be
+  // assigned true/false depending on whether -help or -help-hidden was given or
+  // not.  Since we're circumventing that we have to make it look like -help or
+  // -help-hidden were given, so we assign true.
+
+  if (!Hidden && !Categorized)
+    UncategorizedNormalPrinter = true;
+  else if (!Hidden && Categorized)
+    CategorizedNormalPrinter = true;
+  else if (Hidden && !Categorized)
+    UncategorizedHiddenPrinter = true;
+  else
+    CategorizedHiddenPrinter = true;
 }
 
 /// Utility function for printing version number.
@@ -1418,3 +1596,13 @@ void cl::AddExtraVersionPrinter(void (*func)()) {
 
   ExtraVersionPrinters->push_back(func);
 }
+
+void cl::getRegisteredOptions(StringMap<Option*> &Map)
+{
+  // Get all the options.
+  SmallVector<Option*, 4> PositionalOpts; //NOT USED
+  SmallVector<Option*, 4> SinkOpts;  //NOT USED
+  assert(Map.size() == 0 && "StringMap must be empty");
+  GetOptionInfo(PositionalOpts, SinkOpts, Map);
+  return;
+}
diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp
new file mode 100644
index 0000000..fd8a874
--- /dev/null
+++ b/contrib/llvm/lib/Support/Compression.cpp
@@ -0,0 +1,97 @@
+//===--- Compression.cpp - Compression implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements compression functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Compression.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+using namespace llvm;
+
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) {
+  switch (Level) {
+    case zlib::NoCompression: return 0;
+    case zlib::BestSpeedCompression: return 1;
+    case zlib::DefaultCompression: return Z_DEFAULT_COMPRESSION;
+    case zlib::BestSizeCompression: return 9;
+  }
+  llvm_unreachable("Invalid zlib::CompressionLevel!");
+}
+
+static zlib::Status encodeZlibReturnValue(int ReturnValue) {
+  switch (ReturnValue) {
+    case Z_OK: return zlib::StatusOK;
+    case Z_MEM_ERROR: return zlib::StatusOutOfMemory;
+    case Z_BUF_ERROR: return zlib::StatusBufferTooShort;
+    case Z_STREAM_ERROR: return zlib::StatusInvalidArg;
+    case Z_DATA_ERROR: return zlib::StatusInvalidData;
+    default: llvm_unreachable("unknown zlib return status!");
+  }
+}
+
+bool zlib::isAvailable() { return true; }
+zlib::Status zlib::compress(StringRef InputBuffer,
+                            OwningPtr<MemoryBuffer> &CompressedBuffer,
+                            CompressionLevel Level) {
+  unsigned long CompressedSize = ::compressBound(InputBuffer.size());
+  OwningArrayPtr<char> TmpBuffer(new char[CompressedSize]);
+  int CLevel = encodeZlibCompressionLevel(Level);
+  Status Res = encodeZlibReturnValue(::compress2(
+      (Bytef *)TmpBuffer.get(), &CompressedSize,
+      (const Bytef *)InputBuffer.data(), InputBuffer.size(), CLevel));
+  if (Res == StatusOK) {
+    CompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
+        StringRef(TmpBuffer.get(), CompressedSize)));
+    // Tell MSan that memory initialized by zlib is valid.
+    __msan_unpoison(CompressedBuffer->getBufferStart(), CompressedSize);
+  }
+  return Res;
+}
+
+zlib::Status zlib::uncompress(StringRef InputBuffer,
+                              OwningPtr<MemoryBuffer> &UncompressedBuffer,
+                              size_t UncompressedSize) {
+  OwningArrayPtr<char> TmpBuffer(new char[UncompressedSize]);
+  Status Res = encodeZlibReturnValue(
+      ::uncompress((Bytef *)TmpBuffer.get(), (uLongf *)&UncompressedSize,
+                   (const Bytef *)InputBuffer.data(), InputBuffer.size()));
+  if (Res == StatusOK) {
+    UncompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
+        StringRef(TmpBuffer.get(), UncompressedSize)));
+    // Tell MSan that memory initialized by zlib is valid.
+    __msan_unpoison(UncompressedBuffer->getBufferStart(), UncompressedSize);
+  }
+  return Res;
+}
+
+#else
+bool zlib::isAvailable() { return false; }
+zlib::Status zlib::compress(StringRef InputBuffer,
+                            OwningPtr<MemoryBuffer> &CompressedBuffer,
+                            CompressionLevel Level) {
+  return zlib::StatusUnsupported;
+}
+zlib::Status zlib::uncompress(StringRef InputBuffer,
+                              OwningPtr<MemoryBuffer> &UncompressedBuffer,
+                              size_t UncompressedSize) {
+  return zlib::StatusUnsupported;
+}
+#endif
+
diff --git a/contrib/llvm/lib/Support/DataExtractor.cpp b/contrib/llvm/lib/Support/DataExtractor.cpp
index 3d5cce0..a564d21 100644
--- a/contrib/llvm/lib/Support/DataExtractor.cpp
+++ b/contrib/llvm/lib/Support/DataExtractor.cpp
@@ -20,7 +20,7 @@ static T getU(uint32_t *offset_ptr, const DataExtractor *de,
   uint32_t offset = *offset_ptr;
   if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
     std::memcpy(&val, &Data[offset], sizeof(val));
-    if (sys::isLittleEndianHost() != isLittleEndian)
+    if (sys::IsLittleEndianHost != isLittleEndian)
       val = sys::SwapByteOrder(val);
 
     // Advance the offset
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index 36e33b5..145f12d 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -101,7 +101,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
     // Otherwise do it the hard way.
     // To be compatible with above bulk transfer, we need to take endianness
     // into account.
-    if (sys::isBigEndianHost()) {
+    if (sys::IsBigEndianHost) {
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 4] << 24) |
                      ((unsigned char)String[Pos - 3] << 16) |
@@ -110,7 +110,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
         Bits.push_back(V);
       }
     } else {
-      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+      assert(sys::IsLittleEndianHost && "Unexpected host endianness");
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 1] << 24) |
                      ((unsigned char)String[Pos - 2] << 16) |
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index 73d98d1..a7c7a95 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -112,19 +112,19 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
 #endif
 }
 
-static bool OSHasAVXSupport() {
-#if defined(__GNUC__)
-  // Check xgetbv; this uses a .byte sequence instead of the instruction 
-  // directly because older assemblers do not include support for xgetbv and 
-  // there is no easy way to conditionally compile based on the assembler used.
-  int rEAX, rEDX;
-  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
-#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
-  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#else
-  int rEAX = 0; // Ensures we return false
-#endif
-  return (rEAX & 6) == 6;
+static bool OSHasAVXSupport() {
+#if defined(__GNUC__)
+  // Check xgetbv; this uses a .byte sequence instead of the instruction
+  // directly because older assemblers do not include support for xgetbv and
+  // there is no easy way to conditionally compile based on the assembler used.
+  int rEAX, rEDX;
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+  int rEAX = 0; // Ensures we return false
+#endif
+  return (rEAX & 6) == 6;
 }
 
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -355,10 +355,15 @@ std::string sys::getHostCPUName() {
       case 20:
         return "btver1";
       case 21:
-        if (Model <= 15)
-          return "bdver1";
-        else if (Model <= 31)
+        if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
+          return "btver1";
+        if (Model > 15 && Model <= 31)
           return "bdver2";
+        return "bdver1";
+      case 22:
+        if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
+          return "btver1";
+        return "btver2";
     default:
       return "generic";
     }
@@ -608,7 +613,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features){
 #endif
 
 std::string sys::getProcessTriple() {
-  Triple PT(LLVM_HOSTTRIPLE);
+  Triple PT(LLVM_HOST_TRIPLE);
 
   if (sizeof(void *) == 8 && PT.isArch32Bit())
     PT = PT.get64BitArchVariant();
diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp
index 92d8b83..2917e27 100644
--- a/contrib/llvm/lib/Support/LockFileManager.cpp
+++ b/contrib/llvm/lib/Support/LockFileManager.cpp
@@ -174,8 +174,8 @@ void LockFileManager::waitForUnlock() {
   Interval.tv_sec = 0;
   Interval.tv_nsec = 1000000;
 #endif
-  // Don't wait more than an hour for the file to appear.
-  const unsigned MaxSeconds = 3600;
+  // Don't wait more than five minutes for the file to appear.
+  unsigned MaxSeconds = 300;
   bool LockFileGone = false;
   do {
     // Sleep for the designated interval, to allow the owning process time to
@@ -187,21 +187,48 @@ void LockFileManager::waitForUnlock() {
 #else
     nanosleep(&Interval, NULL);
 #endif
-    // If the lock file no longer exists, wait for the actual file.
     bool Exists = false;
+    bool LockFileJustDisappeared = false;
+
+    // If the lock file is still expected to be there, check whether it still
+    // is.
     if (!LockFileGone) {
       if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) {
         LockFileGone = true;
+        LockFileJustDisappeared = true;
         Exists = false;
       }
     }
+
+    // If the lock file is no longer there, check if the original file is
+    // available now.
     if (LockFileGone) {
-      if (!sys::fs::exists(FileName.str(), Exists) && Exists)
+      if (!sys::fs::exists(FileName.str(), Exists) && Exists) {
         return;
+      }
+
+      // The lock file is gone, so now we're waiting for the original file to
+      // show up. If this just happened, reset our waiting intervals and keep
+      // waiting.
+      if (LockFileJustDisappeared) {
+        MaxSeconds = 5;
+
+#if LLVM_ON_WIN32
+        Interval = 1;
+#else
+        Interval.tv_sec = 0;
+        Interval.tv_nsec = 1000000;
+#endif
+        continue;
+      }
     }
 
-    if (!processStillExecuting((*Owner).first, (*Owner).second))
+    // If we're looking for the lock file to disappear, but the process
+    // owning the lock died without cleaning up, just bail out.
+    if (!LockFileGone &&
+        !processStillExecuting((*Owner).first, (*Owner).second)) {
       return;
+    }
 
     // Exponentially increase the time we wait for the lock to be removed.
 #if LLVM_ON_WIN32
diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp
index 58a6ea7..ac53a9e9 100644
--- a/contrib/llvm/lib/Support/PathV2.cpp
+++ b/contrib/llvm/lib/Support/PathV2.cpp
@@ -789,8 +789,11 @@ file_magic identify_magic(StringRef magic) {
 
     case '\177':
       if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
-        if (magic.size() >= 18 && magic[17] == 0)
-          switch (magic[16]) {
+        bool Data2MSB = magic[5] == 2;
+        unsigned high = Data2MSB ? 16 : 17;
+        unsigned low  = Data2MSB ? 17 : 16;
+        if (magic.size() >= 18 && magic[high] == 0)
+          switch (magic[low]) {
             default: break;
             case 1: return file_magic::elf_relocatable;
             case 2: return file_magic::elf_executable;
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index d2508ac..412e34c 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -32,6 +32,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case r600:    return "r600";
   case sparc:   return "sparc";
   case sparcv9: return "sparcv9";
+  case systemz: return "s390x";
   case tce:     return "tce";
   case thumb:   return "thumb";
   case x86:     return "i386";
@@ -76,6 +77,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case sparcv9:
   case sparc:   return "sparc";
 
+  case systemz: return "systemz";
+
   case x86:
   case x86_64:  return "x86";
 
@@ -170,6 +173,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("hexagon", hexagon)
     .Case("sparc", sparc)
     .Case("sparcv9", sparcv9)
+    .Case("systemz", systemz)
     .Case("tce", tce)
     .Case("thumb", thumb)
     .Case("x86", x86)
@@ -233,6 +237,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("mips64el", Triple::mips64el)
     .Case("r600", Triple::r600)
     .Case("hexagon", Triple::hexagon)
+    .Case("s390x", Triple::systemz)
     .Case("sparc", Triple::sparc)
     .Case("sparcv9", Triple::sparcv9)
     .Case("tce", Triple::tce)
@@ -687,6 +692,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::nvptx64:
   case llvm::Triple::ppc64:
   case llvm::Triple::sparcv9:
+  case llvm::Triple::systemz:
   case llvm::Triple::x86_64:
   case llvm::Triple::spir64:
     return 64;
@@ -712,6 +718,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::UnknownArch:
   case Triple::aarch64:
   case Triple::msp430:
+  case Triple::systemz:
     T.setArch(UnknownArch);
     break;
 
@@ -769,6 +776,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::nvptx64:
   case Triple::ppc64:
   case Triple::sparcv9:
+  case Triple::systemz:
   case Triple::x86_64:
     // Already 64-bit.
     break;
diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
index f397408..2bb9bf1 100644
--- a/contrib/llvm/lib/Support/Unix/Memory.inc
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -325,7 +325,7 @@ void Memory::InvalidateInstructionCache(const void *Addr,
   for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
     asm volatile("icbi 0, %0" : : "r"(Line));
   asm volatile("isync");
-#  elif defined(__arm__) && defined(__GNUC__) && !defined(__FreeBSD__)
+#  elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) && !defined(__FreeBSD__)
   // FIXME: Can we safely always call this for __GNUC__ everywhere?
   const char *Start = static_cast<const char *>(Addr);
   const char *End = Start + Len;
diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc
index a3dfd4b..7e0aead 100644
--- a/contrib/llvm/lib/Support/Unix/PathV2.inc
+++ b/contrib/llvm/lib/Support/Unix/PathV2.inc
@@ -430,9 +430,7 @@ rety_open_create:
     if (SavedErrno == errc::file_exists)
       goto retry_random_path;
     // If path prefix doesn't exist, try to create it.
-    if (SavedErrno == errc::no_such_file_or_directory &&
-        !exists(path::parent_path(RandomPath)) &&
-        !TriedToCreateParent) {
+    if (SavedErrno == errc::no_such_file_or_directory && !TriedToCreateParent) {
       TriedToCreateParent = true;
       StringRef p(RandomPath);
       SmallString<64> dir_to_create;
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
index 117151c..aa03d48 100644
--- a/contrib/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -32,6 +32,9 @@
 #if HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #ifdef HAVE_POSIX_SPAWN
 #include <spawn.h>
 #if !defined(__APPLE__)
@@ -409,4 +412,25 @@ error_code Program::ChangeStderrToBinary(){
   return make_error_code(errc::success);
 }
 
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+  static long ArgMax = sysconf(_SC_ARG_MAX);
+
+  // System says no practical limit.
+  if (ArgMax == -1)
+    return true;
+
+  // Conservatively account for space required by environment variables.
+  ArgMax /= 2;
+
+  size_t ArgLength = 0;
+  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+       I != E; ++I) {
+    ArgLength += strlen(*I) + 1;
+    if (ArgLength > size_t(ArgMax)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index 66338f1..64d1fc1 100644
--- a/contrib/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -27,10 +27,12 @@
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
-#if HAVE_DLFCN_H && __GNUG__
-#include <dlfcn.h>
+#if HAVE_CXXABI_H
 #include <cxxabi.h>
 #endif
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
 #if HAVE_MACH_MACH_H
 #include <mach/mach.h>
 #endif
@@ -184,6 +186,15 @@ static RETSIGTYPE SignalHandler(int Sig) {
   // Otherwise if it is a fault (like SEGV) run any handler.
   for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
     CallBacksToRun[i].first(CallBacksToRun[i].second);
+
+#ifdef __s390__
+  // On S/390, certain signals are delivered with PSW Address pointing to
+  // *after* the faulting instruction.  Simply returning from the signal
+  // handler would continue execution after that point, instead of
+  // re-raising the signal.  Raise the signal manually in those cases.
+  if (Sig == SIGILL || Sig == SIGFPE || Sig == SIGTRAP)
+    raise(Sig);
+#endif
 }
 
 void llvm::sys::RunInterruptHandlers() {
@@ -290,9 +301,13 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
             (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
 
     if (dlinfo.dli_sname != NULL) {
-      int res;
       fputc(' ', FD);
+#  if HAVE_CXXABI_H
+      int res;
       char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+#  else
+      char* d = NULL;
+#  endif
       if (d == NULL) fputs(dlinfo.dli_sname, FD);
       else           fputs(d, FD);
       free(d);
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index 691d6d4..619ae5d 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -126,20 +126,58 @@ static bool ArgNeedsQuotes(const char *Str) {
   return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
 }
 
+/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur
+/// in the C string Start.
+static unsigned int CountPrecedingBackslashes(const char *Start,
+                                              const char *Cur) {
+  unsigned int Count = 0;
+  --Cur;
+  while (Cur >= Start && *Cur == '\\') {
+    ++Count;
+    --Cur;
+  }
+  return Count;
+}
+
+/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash
+/// preceding Cur in the Start string.  Assumes Dst has enough space.
+static char *EscapePrecedingEscapes(char *Dst, const char *Start,
+                                    const char *Cur) {
+  unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur);
+  while (PrecedingEscapes > 0) {
+    *Dst++ = '\\';
+    --PrecedingEscapes;
+  }
+  return Dst;
+}
 
 /// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
 /// CreateProcess and returns length of quoted arg with escaped quotes
 static unsigned int ArgLenWithQuotes(const char *Str) {
-  unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+  const char *Start = Str;
+  bool Quoted = ArgNeedsQuotes(Str);
+  unsigned int len = Quoted ? 2 : 0;
 
   while (*Str != '\0') {
-    if (*Str == '\"')
-      ++len;
+    if (*Str == '\"') {
+      // We need to add a backslash, but ensure that it isn't escaped.
+      unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
+      len += PrecedingEscapes + 1;
+    }
+    // Note that we *don't* need to escape runs of backslashes that don't
+    // precede a double quote!  See MSDN:
+    // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx
 
     ++len;
     ++Str;
   }
 
+  if (Quoted) {
+    // Make sure the closing quote doesn't get escaped by a trailing backslash.
+    unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
+    len += PrecedingEscapes + 1;
+  }
+
   return len;
 }
 
@@ -180,20 +218,27 @@ Program::Execute(const Path& path,
 
   for (unsigned i = 0; args[i]; i++) {
     const char *arg = args[i];
+    const char *start = arg;
 
     bool needsQuoting = ArgNeedsQuotes(arg);
     if (needsQuoting)
       *p++ = '"';
 
     while (*arg != '\0') {
-      if (*arg == '\"')
+      if (*arg == '\"') {
+        // Escape all preceding escapes (if any), and then escape the quote.
+        p = EscapePrecedingEscapes(p, start, arg);
         *p++ = '\\';
+      }
 
       *p++ = *arg++;
     }
 
-    if (needsQuoting)
+    if (needsQuoting) {
+      // Make sure our quote doesn't get escaped by a trailing backslash.
+      p = EscapePrecedingEscapes(p, start, arg);
       *p++ = '"';
+    }
     *p++ = ' ';
   }
 
@@ -396,4 +441,20 @@ error_code Program::ChangeStderrToBinary(){
   return make_error_code(errc::success);
 }
 
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+  // The documented max length of the command line passed to CreateProcess.
+  static const size_t MaxCommandStringLength = 32768;
+  size_t ArgLength = 0;
+  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+       I != E; ++I) {
+    // Account for the trailing space for every arg but the last one and the
+    // trailing NULL of the last argument.
+    ArgLength += ArgLenWithQuotes(*I) + 1;
+    if (ArgLength > MaxCommandStringLength) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index 3dd6660..b18b4d1 100644
--- a/contrib/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -178,6 +178,19 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
 
 #ifdef _MSC_VER
+/// AvoidMessageBoxHook - Emulates hitting "retry" from an "abort, retry,
+/// ignore" CRT debug report dialog.  "retry" raises an exception which
+/// ultimately triggers our stack dumper.
+static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
+  // Set *Return to the retry code for the return value of _CrtDbgReport:
+  // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx
+  // This may also trigger just-in-time debugging via DebugBreak().
+  if (Return)
+    *Return = 1;
+  // Don't call _CrtDbgReport.
+  return TRUE;
+}
+
 /// CRTReportHook - Function called on a CRT debugging event.
 static int CRTReportHook(int ReportType, char *Message, int *Return) {
   // Don't cause a DebugBreak() on return.
@@ -238,6 +251,15 @@ static void RegisterHandler() {
   OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
   SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
 
+#ifdef _MSC_VER
+  const char *EnableMsgbox = getenv("LLVM_ENABLE_CRT_REPORT");
+  if (!EnableMsgbox || strcmp("0", EnableMsgbox) == 0) {
+    // Setting a report hook overrides the default behavior of popping an "abort,
+    // retry, or ignore" dialog.
+    _CrtSetReportHook(AvoidMessageBoxHook);
+  }
+#endif
+
   // Environment variable to disable any kind of crash dialog.
   if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
 #ifdef _MSC_VER
diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp
index 2cead20..213f5e1 100644
--- a/contrib/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm/lib/Support/YAMLParser.cpp
@@ -260,7 +260,7 @@ public:
   Token getNext();
 
   void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
-                  ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+                  ArrayRef<SMRange> Ranges = None) {
     SM.PrintMessage(Loc, Kind, Message, Ranges);
   }
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index dc41f2f..daa7f1d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -367,9 +367,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // shoving a base register and an offset into the instruction then we may well
   // need to scavenge registers. We should either specifically add an
   // callee-save register for this purpose or allocate an extra spill slot.
-
   bool BigStack =
-    (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+    MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
     || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
     || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
 
@@ -392,6 +391,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   if (ExtraReg != 0) {
     MF.getRegInfo().setPhysRegUsed(ExtraReg);
   } else {
+    assert(RS && "Expect register scavenger to be available");
+
     // Create a stack slot for scavenging purposes. PrologEpilogInserter
     // helpfully places it near either SP or FP for us to avoid
     // infinitely-regression during scavenging.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 46b8221..102c71b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -70,6 +70,15 @@ public:
     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
   }
 
+  /// Used for pre-lowered address-reference nodes, so we already know
+  /// the fields match. This operand's job is simply to add an
+  /// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction.
+  bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
+    Imm = N;
+    Shift = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
   bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
 
   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
@@ -88,6 +97,13 @@ public:
 
   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
 
+  SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
+                       unsigned Op64);
+
+  /// Put the given constant into a pool and return a DAG which will give its
+  /// address.
+  SDValue getConstantPoolItemAddress(DebugLoc DL, const Constant *CV);
+
   SDNode *TrySelectToMoveImm(SDNode *N);
   SDNode *LowerToFPLitPool(SDNode *Node);
   SDNode *SelectToLitPool(SDNode *N);
@@ -224,12 +240,51 @@ SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
   return ResNode;
 }
 
+SDValue
+AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL,
+                                                const Constant *CV) {
+  EVT PtrVT = TLI.getPointerTy();
+
+  switch (TLI.getTargetMachine().getCodeModel()) {
+  case CodeModel::Small: {
+    unsigned Alignment =
+        TLI.getDataLayout()->getABITypeAlignment(CV->getType());
+    return CurDAG->getNode(
+        AArch64ISD::WrapperSmall, DL, PtrVT,
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
+        CurDAG->getConstant(Alignment, MVT::i32));
+  }
+  case CodeModel::Large: {
+    SDNode *LitAddr;
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVZxii, DL, PtrVT,
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    return SDValue(LitAddr, 0);
+  }
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
+}
+
 SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
   DebugLoc DL = Node->getDebugLoc();
   uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
   int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
   EVT DestType = Node->getValueType(0);
-  EVT PtrVT = TLI.getPointerTy();
 
   // Since we may end up loading a 64-bit constant from a 32-bit entry the
   // constant in the pool may have a different type to the eventual node.
@@ -256,14 +311,8 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
   Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
                                                   MemType.getSizeInBits()),
                                   UnsignedVal);
-  SDValue PoolAddr;
+  SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
   unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType());
-  PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
-                             CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
-                                                         AArch64II::MO_NO_FLAG),
-                             CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
-                                                           AArch64II::MO_LO12),
-                             CurDAG->getConstant(Alignment, MVT::i32));
 
   return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
                             PoolAddr,
@@ -276,20 +325,10 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
 SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
   DebugLoc DL = Node->getDebugLoc();
   const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
-  EVT PtrVT = TLI.getPointerTy();
   EVT DestType = Node->getValueType(0);
 
   unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType());
-  SDValue PoolAddr;
-
-  assert(TM.getCodeModel() == CodeModel::Small &&
-         "Only small code model supported");
-  PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
-                             CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
-                                                         AArch64II::MO_NO_FLAG),
-                             CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
-                                                           AArch64II::MO_LO12),
-                             CurDAG->getConstant(Alignment, MVT::i32));
+  SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
 
   return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
                          MachinePointerInfo::getConstantPool(),
@@ -318,6 +357,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
   return true;
 }
 
+SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+                                          unsigned Op16,unsigned Op32,
+                                          unsigned Op64) {
+  // Mostly direct translation to the given operations, except that we preserve
+  // the AtomicOrdering for use later on.
+  AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+  EVT VT = AN->getMemoryVT();
+
+  unsigned Op;
+  if (VT == MVT::i8)
+    Op = Op8;
+  else if (VT == MVT::i16)
+    Op = Op16;
+  else if (VT == MVT::i32)
+    Op = Op32;
+  else if (VT == MVT::i64)
+    Op = Op64;
+  else
+    llvm_unreachable("Unexpected atomic operation");
+
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+      Ops.push_back(AN->getOperand(i));
+
+  Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+  Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+  return CurDAG->SelectNodeTo(Node, Op,
+                              AN->getValueType(0), MVT::Other,
+                              &Ops[0], Ops.size());
+}
+
 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   // Dump information about the Node being selected
   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@@ -328,6 +399,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   }
 
   switch (Node->getOpcode()) {
+  case ISD::ATOMIC_LOAD_ADD:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_ADD_I8,
+                        AArch64::ATOMIC_LOAD_ADD_I16,
+                        AArch64::ATOMIC_LOAD_ADD_I32,
+                        AArch64::ATOMIC_LOAD_ADD_I64);
+  case ISD::ATOMIC_LOAD_SUB:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_SUB_I8,
+                        AArch64::ATOMIC_LOAD_SUB_I16,
+                        AArch64::ATOMIC_LOAD_SUB_I32,
+                        AArch64::ATOMIC_LOAD_SUB_I64);
+  case ISD::ATOMIC_LOAD_AND:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_AND_I8,
+                        AArch64::ATOMIC_LOAD_AND_I16,
+                        AArch64::ATOMIC_LOAD_AND_I32,
+                        AArch64::ATOMIC_LOAD_AND_I64);
+  case ISD::ATOMIC_LOAD_OR:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_OR_I8,
+                        AArch64::ATOMIC_LOAD_OR_I16,
+                        AArch64::ATOMIC_LOAD_OR_I32,
+                        AArch64::ATOMIC_LOAD_OR_I64);
+  case ISD::ATOMIC_LOAD_XOR:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_XOR_I8,
+                        AArch64::ATOMIC_LOAD_XOR_I16,
+                        AArch64::ATOMIC_LOAD_XOR_I32,
+                        AArch64::ATOMIC_LOAD_XOR_I64);
+  case ISD::ATOMIC_LOAD_NAND:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_NAND_I8,
+                        AArch64::ATOMIC_LOAD_NAND_I16,
+                        AArch64::ATOMIC_LOAD_NAND_I32,
+                        AArch64::ATOMIC_LOAD_NAND_I64);
+  case ISD::ATOMIC_LOAD_MIN:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_MIN_I8,
+                        AArch64::ATOMIC_LOAD_MIN_I16,
+                        AArch64::ATOMIC_LOAD_MIN_I32,
+                        AArch64::ATOMIC_LOAD_MIN_I64);
+  case ISD::ATOMIC_LOAD_MAX:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_MAX_I8,
+                        AArch64::ATOMIC_LOAD_MAX_I16,
+                        AArch64::ATOMIC_LOAD_MAX_I32,
+                        AArch64::ATOMIC_LOAD_MAX_I64);
+  case ISD::ATOMIC_LOAD_UMIN:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_UMIN_I8,
+                        AArch64::ATOMIC_LOAD_UMIN_I16,
+                        AArch64::ATOMIC_LOAD_UMIN_I32,
+                        AArch64::ATOMIC_LOAD_UMIN_I64);
+  case ISD::ATOMIC_LOAD_UMAX:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_UMAX_I8,
+                        AArch64::ATOMIC_LOAD_UMAX_I16,
+                        AArch64::ATOMIC_LOAD_UMAX_I32,
+                        AArch64::ATOMIC_LOAD_UMAX_I64);
+  case ISD::ATOMIC_SWAP:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_SWAP_I8,
+                        AArch64::ATOMIC_SWAP_I16,
+                        AArch64::ATOMIC_SWAP_I32,
+                        AArch64::ATOMIC_SWAP_I64);
+  case ISD::ATOMIC_CMP_SWAP:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_CMP_SWAP_I8,
+                        AArch64::ATOMIC_CMP_SWAP_I16,
+                        AArch64::ATOMIC_CMP_SWAP_I32,
+                        AArch64::ATOMIC_CMP_SWAP_I64);
   case ISD::FrameIndex: {
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     EVT PtrTy = TLI.getPointerTy();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e9f4497..56f6751 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -59,13 +59,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // Some atomic operations can be folded into load-acquire or store-release
-  // instructions on AArch64. It's marginally simpler to let LLVM expand
-  // everything out to a barrier and then recombine the (few) barriers we can.
-  setInsertFencesForAtomic(true);
-  setTargetDAGCombine(ISD::ATOMIC_FENCE);
-  setTargetDAGCombine(ISD::ATOMIC_STORE);
-
   // We combine OR nodes for bitfield and NEON BSL operations.
   setTargetDAGCombine(ISD::OR);
 
@@ -275,27 +268,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
   return VT.changeVectorElementTypeToInteger();
 }
 
-static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
-                                  unsigned &strOpc) {
-  switch (Size) {
-  default: llvm_unreachable("unsupported size for atomic binary op!");
-  case 1:
-    ldrOpc = AArch64::LDXR_byte;
-    strOpc = AArch64::STXR_byte;
-    break;
-  case 2:
-    ldrOpc = AArch64::LDXR_hword;
-    strOpc = AArch64::STXR_hword;
-    break;
-  case 4:
-    ldrOpc = AArch64::LDXR_word;
-    strOpc = AArch64::STXR_word;
-    break;
-  case 8:
-    ldrOpc = AArch64::LDXR_dword;
-    strOpc = AArch64::STXR_dword;
-    break;
-  }
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+                                  unsigned &LdrOpc,
+                                  unsigned &StrOpc) {
+  static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
+                                 AArch64::LDXR_word, AArch64::LDXR_dword};
+  static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
+                                AArch64::LDAXR_word, AArch64::LDAXR_dword};
+  static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
+                                  AArch64::STXR_word, AArch64::STXR_dword};
+  static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword,
+                                 AArch64::STLXR_word, AArch64::STLXR_dword};
+
+  unsigned *LoadOps, *StoreOps;
+  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    LoadOps = LoadAcqs;
+  else
+    LoadOps = LoadBares;
+
+  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    StoreOps = StoreRels;
+  else
+    StoreOps = StoreBares;
+
+  assert(isPowerOf2_32(Size) && Size <= 8 &&
+         "unsupported size for atomic binary op!");
+
+  LdrOpc = LoadOps[Log2_32(Size)];
+  StrOpc = StoreOps[Log2_32(Size)];
 }
 
 MachineBasicBlock *
@@ -313,12 +313,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptr = MI->getOperand(1).getReg();
   unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
   DebugLoc dl = MI->getDebugLoc();
 
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -397,6 +398,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptr = MI->getOperand(1).getReg();
   unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+
   unsigned oldval = dest;
   DebugLoc dl = MI->getDebugLoc();
 
@@ -411,7 +414,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
   }
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -479,6 +482,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   unsigned ptr     = MI->getOperand(1).getReg();
   unsigned oldval  = MI->getOperand(2).getReg();
   unsigned newval  = MI->getOperand(3).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
 
@@ -487,7 +491,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineFunction *MF = BB->getParent();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -777,6 +781,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::TC_RETURN:      return "AArch64ISD::TC_RETURN";
   case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
   case AArch64ISD::TLSDESCCALL:    return "AArch64ISD::TLSDESCCALL";
+  case AArch64ISD::WrapperLarge:   return "AArch64ISD::WrapperLarge";
   case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall";
 
   default:                       return NULL;
@@ -1662,17 +1667,26 @@ AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
 
-  assert(getTargetMachine().getCodeModel() == CodeModel::Small
-         && "Only small code model supported at the moment");
-
-  // The most efficient code is PC-relative anyway for the small memory model,
-  // so we don't need to worry about relocation model.
-  return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
-                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
-                                               AArch64II::MO_NO_FLAG),
-                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
-                                               AArch64II::MO_LO12),
-                     DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+  switch(getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    // The most efficient code is PC-relative anyway for the small memory model,
+    // so we don't need to worry about relocation model.
+    return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                                 AArch64II::MO_NO_FLAG),
+                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                                 AArch64II::MO_LO12),
+                       DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+  case CodeModel::Large:
+    return DAG.getNode(
+      AArch64ISD::WrapperLarge, DL, PtrVT,
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
 }
 
 
@@ -1841,12 +1855,33 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
 }
 
 SDValue
-AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
-                                             SelectionDAG &DAG) const {
-  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
-  // we make that distinction here.
+AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  assert(getTargetMachine().getCodeModel() == CodeModel::Large);
+  assert(getTargetMachine().getRelocationModel() == Reloc::Static);
+
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GN->getGlobal();
+
+  SDValue GlobalAddr = DAG.getNode(
+      AArch64ISD::WrapperLarge, dl, PtrVT,
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
 
-  // We support the small memory model for now.
+  if (GN->getOffset() != 0)
+    return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+                       DAG.getConstant(GN->getOffset(), PtrVT));
+
+  return GlobalAddr;
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
+                                                  SelectionDAG &DAG) const {
   assert(getTargetMachine().getCodeModel() == CodeModel::Small);
 
   EVT PtrVT = getPointerTy();
@@ -1925,6 +1960,22 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
   return GlobalRef;
 }
 
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+  // we make those distinctions here.
+
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    return LowerGlobalAddressELFSmall(Op, DAG);
+  case CodeModel::Large:
+    return LowerGlobalAddressELFLarge(Op, DAG);
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
+}
+
 SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
                                                 SDValue DescAddr,
                                                 DebugLoc DL,
@@ -1974,6 +2025,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   assert(Subtarget->isTargetELF() &&
          "TLS not implemented for non-ELF targets");
+  assert(getTargetMachine().getCodeModel() == CodeModel::Small
+         && "TLS only supported in small memory model");
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -2082,14 +2135,27 @@ SDValue
 AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   DebugLoc dl = JT->getDebugLoc();
+  EVT PtrVT = getPointerTy();
 
   // When compiling PIC, jump tables get put in the code section so a static
   // relocation-style is acceptable for both cases.
-  return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
-                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
-                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
-                                            AArch64II::MO_LO12),
-                     DAG.getConstant(1, MVT::i32));
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
+                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+                                              AArch64II::MO_LO12),
+                       DAG.getConstant(1, MVT::i32));
+  case CodeModel::Large:
+    return DAG.getNode(
+      AArch64ISD::WrapperLarge, dl, PtrVT,
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
 }
 
 // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
@@ -2377,78 +2443,6 @@ static SDValue PerformANDCombine(SDNode *N,
                      DAG.getConstant(LSB + Width - 1, MVT::i64));
 }
 
-static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // An atomic operation followed by an acquiring atomic fence can be reduced to
-  // an acquiring load. The atomic operation provides a convenient pointer to
-  // load from. If the original operation was a load anyway we can actually
-  // combine the two operations into an acquiring load.
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue AtomicOp = FenceNode->getOperand(0);
-  AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
-
-  // A fence on its own can't be optimised
-  if (!AtomicNode)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
-
-  if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
-  // the chain we use should be its input, otherwise we'll put our store after
-  // it so we use its output chain.
-  SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
-    AtomicNode->getChain() : AtomicOp;
-
-  // We have an acquire fence with a handy atomic operation nearby, we can
-  // convert the fence into a load-acquire, discarding the result.
-  DebugLoc DL = FenceNode->getDebugLoc();
-  SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
-                             AtomicNode->getValueType(0),
-                             Chain,                  // Chain
-                             AtomicOp.getOperand(1), // Pointer
-                             AtomicNode->getMemOperand(), Acquire,
-                             FenceScope);
-
-  if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
-    DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
-
-  return Op.getValue(1);
-}
-
-static SDValue PerformATOMIC_STORECombine(SDNode *N,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // A releasing atomic fence followed by an atomic store can be combined into a
-  // single store operation.
-  SelectionDAG &DAG = DCI.DAG;
-  AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
-  SDValue FenceOp = AtomicNode->getOperand(0);
-
-  if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
-
-  if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  DebugLoc DL = AtomicNode->getDebugLoc();
-  return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
-                       FenceOp.getOperand(0),  // Chain
-                       AtomicNode->getOperand(1),       // Pointer
-                       AtomicNode->getOperand(2),       // Value
-                       AtomicNode->getMemOperand(), Release,
-                       FenceScope);
-}
-
 /// For a true bitfield insert, the bits getting into that contiguous mask
 /// should come from the low part of an existing value: they must be formed from
 /// a compatible SHL operation (unless they're already low). This function
@@ -2804,8 +2798,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default: break;
   case ISD::AND: return PerformANDCombine(N, DCI);
-  case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
-  case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
   case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
   case ISD::SRA: return PerformSRACombine(N, DCI);
   }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 4960d28..d49b3ee 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -103,7 +103,12 @@ namespace AArch64ISD {
     UBFX,
 
     // Wraps an address which the ISelLowering phase has decided should be
-    // created using the small absolute memory model: i.e. adrp/add or
+    // created using the large memory model style: i.e. a sequence of four
+    // movz/movk instructions.
+    WrapperLarge,
+
+    // Wraps an address which the ISelLowering phase has decided should be
+    // created using the small memory model style: i.e. adrp/add or
     // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
     // get selected.
     WrapperSmall
@@ -206,7 +211,11 @@ public:
   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+
+  SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
                            SelectionDAG &DAG) const;
   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index cb93471..9dd122f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 // This file describes AArch64 instruction formats, down to the level of the
 // instruction's overall class.
-// ===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 37be5e4..d2cfc7d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -70,12 +70,20 @@ def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
 //       made for a variable/address at ISelLowering.
 //     + The output of ISelLowering should be selectable (hence the Wrapper,
 //       rather than a bare target opcode)
-def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
-                                             SDTCisSameAs<1, 2>,
-                                             SDTCisVT<3, i32>,
-                                             SDTCisPtrTy<0>]>;
+def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+                                                  SDTCisSameAs<0, 2>,
+                                                  SDTCisSameAs<0, 3>,
+                                                  SDTCisSameAs<0, 4>,
+                                                  SDTCisPtrTy<0>]>;
 
-def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
+
+def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+                                                  SDTCisSameAs<1, 2>,
+                                                  SDTCisVT<3, i32>,
+                                                  SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
 
 
 def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
@@ -159,49 +167,55 @@ let Defs = [XSP], Uses = [XSP] in {
 // Atomic operation pseudo-instructions
 //===----------------------------------------------------------------------===//
 
-let usesCustomInserter = 1 in {
-multiclass AtomicSizes<string opname> {
-  def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-          [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>;
-  def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-         [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>;
-  def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-         [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>;
-  def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
-         [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>;
-}
-}
-
-defm ATOMIC_LOAD_ADD  : AtomicSizes<"atomic_load_add">;
-defm ATOMIC_LOAD_SUB  : AtomicSizes<"atomic_load_sub">;
-defm ATOMIC_LOAD_AND  : AtomicSizes<"atomic_load_and">;
-defm ATOMIC_LOAD_OR   : AtomicSizes<"atomic_load_or">;
-defm ATOMIC_LOAD_XOR  : AtomicSizes<"atomic_load_xor">;
-defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
-defm ATOMIC_SWAP      : AtomicSizes<"atomic_swap">;
+// These get selected from C++ code as a pretty much direct translation from the
+// generic DAG nodes. The one exception is the AtomicOrdering is added as an
+// operand so that the eventual lowering can make use of it and choose
+// acquire/release operations when required.
+
+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
+multiclass AtomicSizes {
+  def _I8 : PseudoInst<(outs GPR32:$dst),
+                       (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I16 : PseudoInst<(outs GPR32:$dst),
+                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I32 : PseudoInst<(outs GPR32:$dst),
+                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I64 : PseudoInst<(outs GPR64:$dst),
+                        (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
+}
+}
+
+defm ATOMIC_LOAD_ADD  : AtomicSizes;
+defm ATOMIC_LOAD_SUB  : AtomicSizes;
+defm ATOMIC_LOAD_AND  : AtomicSizes;
+defm ATOMIC_LOAD_OR   : AtomicSizes;
+defm ATOMIC_LOAD_XOR  : AtomicSizes;
+defm ATOMIC_LOAD_NAND : AtomicSizes;
+defm ATOMIC_SWAP      : AtomicSizes;
 let Defs = [NZCV] in {
   // These operations need a CMP to calculate the correct value
-  defm ATOMIC_LOAD_MIN  : AtomicSizes<"atomic_load_min">;
-  defm ATOMIC_LOAD_MAX  : AtomicSizes<"atomic_load_max">;
-  defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
-  defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
-}
-
-let usesCustomInserter = 1, Defs = [NZCV] in {
-def ATOMIC_CMP_SWAP_I8
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-            [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I16
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-           [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I32
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-           [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I64
-  : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
-           [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
+  defm ATOMIC_LOAD_MIN  : AtomicSizes;
+  defm ATOMIC_LOAD_MAX  : AtomicSizes;
+  defm ATOMIC_LOAD_UMIN : AtomicSizes;
+  defm ATOMIC_LOAD_UMAX : AtomicSizes;
+}
+
+class AtomicCmpSwap<RegisterClass GPRData>
+  : PseudoInst<(outs GPRData:$dst),
+               (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
+                    i32imm:$ordering), []> {
+  let usesCustomInserter = 1;
+  let hasCtrlDep = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let Defs = [NZCV];
 }
 
+def ATOMIC_CMP_SWAP_I8  : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
+
 //===----------------------------------------------------------------------===//
 // Add-subtract (extended register) instructions
 //===----------------------------------------------------------------------===//
@@ -2579,7 +2593,8 @@ defm LDAR  : A64I_LRex<"ldar",  0b101>;
 
 class acquiring_load<PatFrag base>
   : PatFrag<(ops node:$ptr), (base node:$ptr), [{
-  return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+  return Ordering == Acquire || Ordering == SequentiallyConsistent;
 }]>;
 
 def atomic_load_acquire_8  : acquiring_load<atomic_load_8>;
@@ -2610,7 +2625,8 @@ class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
 
 class releasing_store<PatFrag base>
   : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
-  return cast<AtomicSDNode>(N)->getOrdering() == Release;
+  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+  return Ordering == Release || Ordering == SequentiallyConsistent;
 }]>;
 
 def atomic_store_release_8  : releasing_store<atomic_store_8>;
@@ -3863,7 +3879,7 @@ multiclass movw_operands<string prefix, string instname, int width> {
     let DiagnosticType = "MOVWUImm16";
   }
 
-  def _imm : Operand<i32> {
+  def _imm : Operand<i64> {
     let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
     let PrintMethod = "printMoveWideImmOperand";
     let EncoderMethod = "getMoveWideImmOpValue";
@@ -3934,7 +3950,7 @@ multiclass movalias_operand<string prefix, string basename,
                                        # "A64Imms::" # immpredicate # ">";
   }
 
-  def _movimm : Operand<i32> {
+  def _movimm : Operand<i64> {
     let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
 
     let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
@@ -3958,6 +3974,15 @@ def : movalias<MOVZxii, GPR64, movz64_movimm>;
 def : movalias<MOVNwii, GPR32, movn32_movimm>;
 def : movalias<MOVNxii, GPR64, movn64_movimm>;
 
+def movw_addressref : ComplexPattern<i64, 2, "SelectMOVWAddressRef">;
+
+def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2,
+                           movw_addressref:$G1, movw_addressref:$G0),
+          (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3),
+                                     movw_addressref:$G2),
+                            movw_addressref:$G1),
+                   movw_addressref:$G0)>;
+
 //===----------------------------------------------------------------------===//
 // PC-relative addressing instructions
 //===----------------------------------------------------------------------===//
@@ -4454,8 +4479,6 @@ def : ADRP_ADD<A64WrapperSmall, tjumptable>;
 // GOT access patterns
 //===----------------------------------------------------------------------===//
 
-// FIXME: Wibble
-
 class GOTLoadSmall<SDNode addrfrag>
   : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
         (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index c96bf85..3d22330 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -68,6 +68,18 @@ AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
   case AArch64II::MO_TPREL_G0_NC:
     Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
     break;
+  case AArch64II::MO_ABS_G3:
+    Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext);
+    break;
+  case AArch64II::MO_ABS_G2_NC:
+    Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_ABS_G1_NC:
+    Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_ABS_G0_NC:
+    Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext);
+    break;
   case AArch64II::MO_NO_FLAG:
     // Expr is already correct
     break;
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index b83577a..3b811df 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -63,14 +63,15 @@ public:
 
   ~AArch64ELFStreamer() {}
 
-  virtual void ChangeSection(const MCSection *Section) {
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection) {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
-    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastMappingSymbols[getPreviousSection().first] = LastEMS;
     LastEMS = LastMappingSymbols.lookup(Section);
 
-    MCELFStreamer::ChangeSection(Section);
+    MCELFStreamer::ChangeSection(Section, Subsection);
   }
 
   /// This function is the one used to emit instruction data into the ELF
@@ -129,7 +130,7 @@ private:
     MCELF::SetType(SD, ELF::STT_NOTYPE);
     MCELF::SetBinding(SD, ELF::STB_LOCAL);
     SD.setExternal(false);
-    Symbol->setSection(*getCurrentSection());
+    Symbol->setSection(*getCurrentSection().first);
 
     const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
     Symbol->setVariableValue(Value);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index c0e3b29..d9798ae 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -133,6 +133,26 @@ public:
     return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
   }
 
+  static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G3, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx);
+  }
+
   /// @}
   /// @name Accessors
   /// @{
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 7960db0..819eead 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -81,6 +81,12 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
 
   if (CM == CodeModel::Default)
     CM = CodeModel::Small;
+  else if (CM == CodeModel::JITDefault) {
+    // The default MCJIT memory managers make no guarantees about where they can
+    // find an executable page; JITed code needs to be able to refer to globals
+    // no matter how far away they are.
+    CM = CodeModel::Large;
+  }
 
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index b8099cb..fc706a4 100644
--- a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -19,6 +19,6 @@ using namespace llvm;
 Target llvm::TheAArch64Target;
 
 extern "C" void LLVMInitializeAArch64TargetInfo() {
-  RegisterTarget<Triple::aarch64>
+    RegisterTarget<Triple::aarch64, /*HasJIT=*/true>
     X(TheAArch64Target, "aarch64", "AArch64");
 }
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 1678559..bedccb5 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -981,8 +981,11 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
       Rotation = RepeatWidth - Rotation;
     }
 
-    uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
-      | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+    uint64_t ReplicatedOnes = ReplicatedMask;
+    if (Rotation != 0 && Rotation != 64)
+      ReplicatedOnes = (ReplicatedMask >> Rotation)
+        | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+
     // Of course, they may not actually be ones, so we have to check that:
     if (!isMask_64(ReplicatedOnes))
       continue;
@@ -1051,13 +1054,14 @@ bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
   int Rotation = (ImmR & (Width - 1));
   uint64_t Mask = (1ULL << Num1s) - 1;
   uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
-  Mask = (Mask >> Rotation)
-    | ((Mask << (Width - Rotation)) & WidthMask);
+  if (Rotation != 0 && Rotation != 64)
+    Mask = (Mask >> Rotation)
+      | ((Mask << (Width - Rotation)) & WidthMask);
 
-  Imm = 0;
-  for (unsigned i = 0; i < RegWidth / Width; ++i) {
-    Imm |= Mask;
+  Imm = Mask;
+  for (unsigned i = 1; i < RegWidth / Width; ++i) {
     Mask <<= Width;
+    Imm |= Mask;
   }
 
   return true;
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 1b773d6..9a1ca61 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1037,7 +1037,14 @@ namespace AArch64II {
 
     // MO_LO12 - On a symbol operand, this represents a relocation containing
     // lower 12 bits of the address. Used in add/sub/ldr/str.
-    MO_LO12
+    MO_LO12,
+
+    // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using
+    // movz/movk instructions.
+    MO_ABS_G3,
+    MO_ABS_G2_NC,
+    MO_ABS_G1_NC,
+    MO_ABS_G0_NC
   };
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 6838084..2d747091 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
                                          "FP compare + branch is slow">;
 def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
                           "Floating point unit supports single precision only">;
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+                          "Enable support for TrustZone security extensions">;
 
 // Some processors have FP multiply-accumulate instructions that don't
 // play nicely with other VFP / NEON instructions, and it's generally better
@@ -144,29 +146,33 @@ include "ARMSchedule.td"
 def ProcA5      : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
                                    "Cortex-A5 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                    FeatureVMLxForwarding, FeatureT2XtPk]>;
+                                    FeatureVMLxForwarding, FeatureT2XtPk,
+                                    FeatureTrustZone]>;
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                    FeatureVMLxForwarding, FeatureT2XtPk]>;
+                                    FeatureVMLxForwarding, FeatureT2XtPk,
+                                    FeatureTrustZone]>;
 def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
                                    "Cortex-A9 ARM processors",
                                    [FeatureVMLxForwarding,
                                     FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR]>;
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureTrustZone]>;
 def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
                                    "Swift ARM processors",
                                    [FeatureNEONForFP, FeatureT2XtPk,
                                     FeatureVFP4, FeatureMP, FeatureHWDiv,
                                     FeatureHWDivARM, FeatureAvoidPartialCPSR,
                                     FeatureAvoidMOVsShOp,
-                                    FeatureHasSlowFPVMLx]>;
+                                    FeatureHasSlowFPVMLx, FeatureTrustZone]>;
 
 // FIXME: It has not been determined if A15 has these features.
 def ProcA15      : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
                                    "Cortex-A15 ARM processors",
                                    [FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR]>;
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureTrustZone]>;
 def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
                                    "Cortex-R5 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureHWDivARM,
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9e68ff4..6005054 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -283,14 +283,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
       return false;
     --I;
   }
-  if (!isUnpredicatedTerminator(I))
-    return false;
 
   // Get the last instruction in the block.
   MachineInstr *LastInst = I;
+  unsigned LastOpc = LastInst->getOpcode();
+
+  // Check if it's an indirect branch first, this should return 'unanalyzable'
+  // even if it's predicated.
+  if (isIndirectBranchOpcode(LastOpc))
+    return true;
+
+  if (!isUnpredicatedTerminator(I))
+    return false;
 
   // If there is only one terminator instruction, process it.
-  unsigned LastOpc = LastInst->getOpcode();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
     if (isUncondBranchOpcode(LastOpc)) {
       TBB = LastInst->getOperand(0).getMBB();
@@ -747,10 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Mov->addRegisterKilled(SrcReg, TRI);
 }
 
-static const
-MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
-                             unsigned Reg, unsigned SubIdx, unsigned State,
-                             const TargetRegisterInfo *TRI) {
+const MachineInstrBuilder &
+ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+                          unsigned SubIdx, unsigned State,
+                          const TargetRegisterInfo *TRI) const {
   if (!SubIdx)
     return MIB.addReg(Reg, State);
 
@@ -795,12 +801,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
       } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
-        MachineInstrBuilder MIB =
-          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
-                       .addFrameIndex(FI))
-                       .addMemOperand(MMO);
-          MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
-                AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+        if (Subtarget.hasV5TEOps()) {
+          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+          MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+          AddDefaultPred(MIB);
+        } else {
+          // Fallback to STM instruction, which has existed since the dawn of
+          // time.
+          MachineInstrBuilder MIB =
+            AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+                             .addFrameIndex(FI).addMemOperand(MMO));
+          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+        }
       } else
         llvm_unreachable("Unknown reg class!");
       break;
@@ -948,7 +964,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
   MachineMemOperand *MMO =
@@ -975,12 +990,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
-      unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
-      MachineInstrBuilder MIB =
-        AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
-                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
-      MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+      MachineInstrBuilder MIB;
+
+      if (Subtarget.hasV5TEOps()) {
+        MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
+        AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+        AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+        MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+        AddDefaultPred(MIB);
+      } else {
+        // Fallback to LDM instruction, which has existed since the dawn of
+        // time.
+        MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
+                                 .addFrameIndex(FI).addMemOperand(MMO));
+        MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+        MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+      }
+
       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
         MIB.addReg(DestReg, RegState::ImplicitDefine);
     } else
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 7c107bb..2ef659c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -141,6 +141,10 @@ public:
 
   MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
 
+  const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+                                     unsigned SubIdx, unsigned State,
+                                     const TargetRegisterInfo *TRI) const;
+
   virtual bool produceSameValue(const MachineInstr *MI0,
                                 const MachineInstr *MI1,
                                 const MachineRegisterInfo *MRI) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b6b27f8..b0d34a7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -75,6 +75,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
 }
 
 const uint32_t*
+ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+  return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+    ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+}
+
+const uint32_t*
 ARMBaseRegisterInfo::getNoPreservedMask() const {
   return CSR_NoRegs_RegMask;
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 725033b..0679919 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -96,6 +96,7 @@ public:
   /// Code Generation virtual methods...
   const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
   const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+  const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
   const uint32_t *getNoPreservedMask() const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index e6e8c3d..4f94ad2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
   static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
   static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
+  static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
   unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
   if (Reg == 0) {
+
+    // If we had R3 unallocated only, now we still must to waste it.
+    Reg = State.AllocateReg(GPRArgRegs, 4);
+    assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
     // For the 2nd half of a v2f64, do not just fail.
     if (CanFail)
       return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index b378b96..8ff666e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
   // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
   // (and the same is true for f64 if VFP is not enabled)
   CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
-  CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
-                       "ArgFlags.getOrigAlign() != 8",
+  CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
                        CCAssignToReg<[R0, R1, R2, R3]>>>,
 
   CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
@@ -195,10 +194,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
 def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
                                      (sequence "D%u", 15, 8))>;
 
+// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
+// and the pointer return value are both passed in R0 in these cases, this can
+// be partially modelled by treating R0 as a callee-saved register
+// Only the resulting RegMask is used; the SaveList is ignored
+def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
+                                            R5, R4, (sequence "D%u", 15, 8),
+                                            R0)>;
+
 // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
 // Also save R7-R4 first to match the stack frame fixed spill areas.
 def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
 
+def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+                                          (sub CSR_AAPCS_ThisReturn, R9))>;
+
 // GHC set of callee saved regs is empty as all those regs are
 // used for passing STG regs around
 // add is a workaround for not being able to compile empty list:
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 29fcd40..5d45f64 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -144,8 +144,8 @@ class ARMFastISel : public FastISel {
     virtual bool TargetSelectInstruction(const Instruction *I);
     virtual unsigned TargetMaterializeConstant(const Constant *C);
     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
-    virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                               const LoadInst *LI);
+    virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                     const LoadInst *LI);
     virtual bool FastLowerArguments();
   private:
   #include "ARMGenFastISel.inc"
@@ -2605,7 +2605,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
 
   unsigned Opc;
   bool isBoolZext = false;
-  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+  const TargetRegisterClass *RC;
   switch (SrcVT.SimpleTy) {
   default: return 0;
   case MVT::i16:
@@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
   return false;
 }
 
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// \brief The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
 /// try to fold the load as an operand to the instruction, returning true if
 /// successful.
-bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                                const LoadInst *LI) {
+bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                      const LoadInst *LI) {
   // Verify we have a legal type before going any further.
   MVT VT;
   if (!isLoadTypeLegal(LI->getType(), VT))
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 7a02adf..483802b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -141,7 +141,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   assert(!AFI->isThumb1OnlyFunction() &&
          "This emitPrologue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -159,8 +159,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     return;
 
   // Allocate the vararg register save area. This is not counted in NumBytes.
-  if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+  if (ArgRegsSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
                  MachineInstr::FrameSetup);
 
   if (!AFI->hasStackFrame()) {
@@ -357,7 +357,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
          "This emitEpilogue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
 
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   int NumBytes = (int)MFI->getStackSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
@@ -471,8 +471,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     MBBI = NewMI;
   }
 
-  if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+  if (ArgRegsSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
 }
 
 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1003,7 +1003,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
 
   // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
@@ -1174,7 +1174,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   if (AFI->isThumb1OnlyFunction()) {
     // Spill LR if Thumb1 function uses variable length argument lists.
-    if (AFI->getVarArgsRegSaveSize() > 0)
+    if (AFI->getArgRegsSaveSize() > 0)
       MRI.setPhysRegUsed(ARM::LR);
 
     // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c51de2..9e1782e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
-                                    MVT::i32, MVT::Other, Ops, 5);
+                                    MVT::i32, MVT::Other, Ops);
     } else {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
-                                    MVT::i32, MVT::Other, Ops, 6);
+                                    MVT::i32, MVT::Other, Ops);
     }
   }
 
@@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
     SDValue Ops[]= { Base, Offset, getAL(CurDAG),
                      CurDAG->getRegister(0, MVT::i32), Chain };
     return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
-                                  MVT::Other, Ops, 5);
+                                  MVT::Other, Ops);
   }
 
   return NULL;
@@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form a D register from a pair of S registers.
@@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form a quad register from a pair of D registers.
@@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive D registers from a pair of Q registers.
@@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive S registers.
@@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive D registers.
@@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive Q registers.
@@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
@@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
   } else {
     // Otherwise, quad registers are loaded with two separate instructions,
@@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
-                                          ResTy, AddrTy, MVT::Other, OpsA, 7);
+                                          ResTy, AddrTy, MVT::Other, OpsA);
     Chain = SDValue(VLdA, 2);
 
     // Load the odd subregs.
@@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
-                                 Ops.data(), Ops.size());
+    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
   }
 
   // Transfer memoperands.
@@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    SDNode *VSt =
-      CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
     // Transfer memoperands.
     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
@@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
                                         MemAddr.getValueType(),
-                                        MVT::Other, OpsA, 7);
+                                        MVT::Other, OpsA);
   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
   Chain = SDValue(VStA, 1);
 
@@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   Ops.push_back(Reg0);
   Ops.push_back(Chain);
   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
-                                        Ops.data(), Ops.size());
+                                        Ops);
   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
   return VStB;
 }
@@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
 
   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
                                   QOpcodes[OpcodeIndex]);
-  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
-                                         Ops.data(), Ops.size());
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
   if (!IsLoad)
     return VLdLn;
@@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
   if (isUpdating)
     ResTys.push_back(MVT::i32);
   ResTys.push_back(MVT::Other);
-  SDNode *VLdDup =
-    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
   SuperReg = SDValue(VLdDup, 0);
 
@@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
   Ops.push_back(getAL(CurDAG)); // predicate
   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
-  return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
 }
 
 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
@@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
                                            MVT::i32, MVT::i32, MVT::Other,
-                                           Ops.data() ,Ops.size());
+                                           Ops);
   cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
   return ResNode;
 }
@@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
-                                         Ops, 4);
+                                         Ops);
       } else {
         SDValue Ops[] = {
           CPIdx,
@@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
           CurDAG->getEntryNode()
         };
         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
-                                       Ops, 5);
+                                       Ops);
       }
       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
       return NULL;
@@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                                                   MVT::i32);
         SDValue Ops[] = { N0.getOperand(0), Imm16,
                           getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
-        return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+        return CurDAG->getMachineNode(Opc, dl, VT, Ops);
       }
     }
     break;
@@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       break;
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
-                        CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::UMULL : ARM::UMULLv5,
-                                    dl, MVT::i32, MVT::i32, Ops, 5);
+                                    dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ISD::SMUL_LOHI: {
@@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::SMULL : ARM::SMULLv5,
-                                    dl, MVT::i32, MVT::i32, Ops, 5);
+                                    dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ARMISD::UMLAL:{
@@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
                         CurDAG->getRegister(0, MVT::i32)};
-      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
@@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                       ARM::UMLAL : ARM::UMLALv5,
-                                      dl, MVT::i32, MVT::i32, Ops, 7);
+                                      dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ARMISD::SMLAL:{
@@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
                         CurDAG->getRegister(0, MVT::i32)};
-      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
@@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                       ARM::SMLAL : ARM::SMLALv5,
-                                      dl, MVT::i32, MVT::i32, Ops, 7);
+                                      dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ISD::LOAD: {
@@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                                MVT::i32);
     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
-                                             MVT::Glue, Ops, 5);
+                                             MVT::Glue, Ops);
     Chain = SDValue(ResNode, 0);
     if (N->getNumValues() == 2) {
       InFlag = SDValue(ResNode, 1);
@@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::VUZP: {
     unsigned Opc = 0;
@@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::VTRN: {
     unsigned Opc = 0;
@@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::BUILD_VECTOR: {
     EVT VecVT = N->getValueType(0);
@@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       Ops.push_back(getAL(CurDAG));
       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
       Ops.push_back(Chain);
-      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
-                                          Ops.size());
+      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
       unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
 
-      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
-                                          Ops.size());
+      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     Ops.push_back(N->getOperand(1));
     Ops.push_back(getAL(CurDAG));                    // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
-    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
   }
   case ARMISD::VTBL2: {
     DebugLoc dl = N->getDebugLoc();
@@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     Ops.push_back(N->getOperand(2));
     Ops.push_back(getAL(CurDAG));                    // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
-    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
-                                  Ops.data(), Ops.size());
+    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
   }
 
   case ISD::CONCAT_VECTORS:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bb26090..e49cfc4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -729,7 +729,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
       (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
     // membarrier needs custom lowering; the rest are legal and handled
     // normally.
-    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
     // Custom lowering for 64-bit ops
     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
@@ -747,7 +746,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setInsertFencesForAtomic(true);
   } else {
     // Set them all for expansion, which will force libcalls.
-    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
@@ -765,8 +763,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     // Unordered/Monotonic case.
     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
-    // Since the libcalls include locking, fold in the fences
-    setShouldFoldAtomicFences(true);
   }
 
   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
@@ -1238,7 +1234,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) const {
+                                   SmallVectorImpl<SDValue> &InVals,
+                                   bool isThisReturn, SDValue ThisVal) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -1252,6 +1249,15 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign VA = RVLocs[i];
 
+    // Pass 'this' value directly from the argument to return value, to avoid
+    // reg unit interference
+    if (i == 0 && isThisReturn) {
+      assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
+             "unexpected return calling convention register assignment");
+      InVals.push_back(ThisVal);
+      continue;
+    }
+
     SDValue Val;
     if (VA.needsCustom()) {
       // Handle f64 or half of a v2f64.
@@ -1363,21 +1369,22 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool isVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
-  bool IsSibCall = false;
+  bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  bool isThisReturn   = false;
+  bool isSibCall      = false;
   // Disable tail calls if they're not supported.
   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
     isTailCall = false;
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                    isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
                                                    Outs, OutVals, Ins, DAG);
     // We don't support GuaranteedTailCallOpt for ARM, only automatically
     // detected sibcalls.
     if (isTailCall) {
       ++NumTailCalls;
-      IsSibCall = true;
+      isSibCall = true;
     }
   }
 
@@ -1393,12 +1400,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
   // For tail calls, memory operands are available in our caller's stack.
-  if (IsSibCall)
+  if (isSibCall)
     NumBytes = 0;
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  if (!IsSibCall)
+  if (!isSibCall)
     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 
   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -1460,6 +1467,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                          StackPtr, MemOpChains, Flags);
       }
     } else if (VA.isRegLoc()) {
+      if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+        assert(VA.getLocVT() == MVT::i32 &&
+               "unexpected calling convention register assignment");
+        assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
+               "unexpected use of 'returned'");
+        isThisReturn = true;
+      }
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
     } else if (isByVal) {
       assert(VA.isMemLoc());
@@ -1467,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
       // True if this byval aggregate will be split between registers
       // and memory.
-      if (CCInfo.isFirstByValRegValid()) {
+      unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+      unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+      if (CurByValIdx < ByValArgsCount) {
+
+        unsigned RegBegin, RegEnd;
+        CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
         unsigned int i, j;
-        for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+        for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
           SDValue Const = DAG.getConstant(4*i, MVT::i32);
           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -1479,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(j, Load));
         }
-        offset = ARM::R4 - CCInfo.getFirstByValReg();
-        CCInfo.clearFirstByValReg();
+
+        // If parameter size outsides register area, "offset" value
+        // helps us to calculate stack slot for remained part properly.
+        offset = RegEnd - RegBegin;
+
+        CCInfo.nextInRegsParam();
       }
 
-      if (Flags.getByValSize() - 4*offset > 0) {
+      if (Flags.getByValSize() > 4*offset) {
         unsigned LocMemOffset = VA.getLocMemOffset();
         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
@@ -1499,7 +1524,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
                                           Ops, array_lengthof(Ops)));
       }
-    } else if (!IsSibCall) {
+    } else if (!isSibCall) {
       assert(VA.isMemLoc());
 
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1539,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                RegsToPass[i].second, InFlag);
       InFlag = Chain.getValue(1);
     }
-    InFlag =SDValue();
+    InFlag = SDValue();
   }
 
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1680,8 +1705,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
+  const uint32_t *Mask;
   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+  if (isThisReturn)
+    // For 'this' returns, use the R0-preserving mask
+    Mask = ARI->getThisReturnPreservedMask(CallConv);
+  else
+    Mask = ARI->getCallPreservedMask(CallConv);
+
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -1703,8 +1735,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
-                         dl, DAG, InVals);
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals, isThisReturn,
+                         isThisReturn ? OutVals[0] : SDValue());
 }
 
 /// HandleByVal - Every parameter *after* a byval parameter is passed
@@ -1718,8 +1751,24 @@ ARMTargetLowering::HandleByVal(
   assert((State->getCallOrPrologue() == Prologue ||
           State->getCallOrPrologue() == Call) &&
          "unhandled ParmContext");
-  if ((!State->isFirstByValRegValid()) &&
-      (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+  // For in-prologue parameters handling, we also introduce stack offset
+  // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+  // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+  // NSAA should be evaluted (NSAA means "next stacked argument address").
+  // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+  // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+  unsigned NSAAOffset = State->getNextStackOffset();
+  if (State->getCallOrPrologue() != Call) {
+    for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+      unsigned RB, RE;
+      State->getInRegsParamInfo(i, RB, RE);
+      assert(NSAAOffset >= (RE-RB)*4 &&
+             "Stack offset for byval regs doesn't introduced anymore?");
+      NSAAOffset -= (RE-RB)*4;
+    }
+  }
+  if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
     if (Subtarget->isAAPCS_ABI() && Align > 4) {
       unsigned AlignInRegs = Align / 4;
       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
@@ -1727,22 +1776,45 @@ ARMTargetLowering::HandleByVal(
         reg = State->AllocateReg(GPRArgRegs, 4);
     }
     if (reg != 0) {
-      State->setFirstByValReg(reg);
+      unsigned excess = 4 * (ARM::R4 - reg);
+
+      // Special case when NSAA != SP and parameter size greater than size of
+      // all remained GPR regs. In that case we can't split parameter, we must
+      // send it to stack. We also must set NCRN to R4, so waste all
+      // remained registers.
+      if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+        while (State->AllocateReg(GPRArgRegs, 4))
+          ;
+        return;
+      }
+
+      // First register for byval parameter is the first register that wasn't
+      // allocated before this method call, so it would be "reg".
+      // If parameter is small enough to be saved in range [reg, r4), then
+      // the end (first after last) register would be reg + param-size-in-regs,
+      // else parameter would be splitted between registers and stack,
+      // end register would be r4 in this case.
+      unsigned ByValRegBegin = reg;
+      unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4;
+      State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+      // Note, first register is allocated in the beginning of function already,
+      // allocate remained amount of registers we need.
+      for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+        State->AllocateReg(GPRArgRegs, 4);
       // At a call site, a byval parameter that is split between
       // registers and memory needs its size truncated here.  In a
       // function prologue, such byval parameters are reassembled in
       // memory, and are not truncated.
       if (State->getCallOrPrologue() == Call) {
-        unsigned excess = 4 * (ARM::R4 - reg);
-        assert(size >= excess && "expected larger existing stack allocation");
-        size -= excess;
+        // Make remained size equal to 0 in case, when
+        // the whole structure may be stored into registers.
+        if (size < excess)
+          size = 0;
+        else
+          size -= excess;
       }
     }
   }
-  // Confiscate any remaining parameter registers to preclude their
-  // assignment to subsequent parameters.
-  while (State->AllocateReg(GPRArgRegs, 4))
-    ;
 }
 
 /// MatchingStackOffset - Return true if the given stack call argument is
@@ -1874,7 +1946,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // local frame.
   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
                                       getInfo<ARMFunctionInfo>();
-  if (AFI_Caller->getVarArgsRegSaveSize())
+  if (AFI_Caller->getArgRegsSaveSize())
     return false;
 
   // If the callee takes no arguments then go on to check the results of the
@@ -2461,35 +2533,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   }
 }
 
-static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
-                               const ARMSubtarget *Subtarget) {
-  DebugLoc dl = Op.getDebugLoc();
-  if (!Subtarget->hasDataBarrier()) {
-    // Some ARMv6 cpus can support data barriers with an mcr instruction.
-    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
-    // here.
-    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
-           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
-    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
-                       DAG.getConstant(0, MVT::i32));
-  }
-
-  SDValue Op5 = Op.getOperand(5);
-  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
-  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
-  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
-
-  ARM_MB::MemBOpt DMBOpt;
-  if (isDeviceBarrier)
-    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
-  else
-    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
-  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(DMBOpt, MVT::i32));
-}
-
-
 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
                                  const ARMSubtarget *Subtarget) {
   // FIXME: handle "fence singlethread" more efficiently.
@@ -2586,12 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
 
 void
 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
-                                  unsigned &VARegSize, unsigned &VARegSaveSize)
+                                  unsigned InRegsParamRecordIdx,
+                                  unsigned &ArgRegsSize,
+                                  unsigned &ArgRegsSaveSize)
   const {
   unsigned NumGPRs;
-  if (CCInfo.isFirstByValRegValid())
-    NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
-  else {
+  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+    unsigned RBegin, REnd;
+    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+    NumGPRs = REnd - RBegin;
+  } else {
     unsigned int firstUnalloced;
     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
                                                 sizeof(GPRArgRegs) /
@@ -2600,8 +2647,8 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
   }
 
   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
-  VARegSize = NumGPRs * 4;
-  VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+  ArgRegsSize = NumGPRs * 4;
+  ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1);
 }
 
 // The remaining GPRs hold either the beginning of variable-argument
@@ -2611,40 +2658,60 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
 // If this is a variadic function, the va_list pointer will begin with
 // these values; otherwise, this reassembles a (byval) structure that
 // was split between registers and memory.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
-                                        DebugLoc dl, SDValue &Chain,
-                                        const Value *OrigArg,
-                                        unsigned OffsetFromOrigArg,
-                                        unsigned ArgOffset,
-                                        bool ForceMutable) const {
+// Return: The frame index registers were stored into.
+int
+ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+                                  DebugLoc dl, SDValue &Chain,
+                                  const Value *OrigArg,
+                                  unsigned InRegsParamRecordIdx,
+                                  unsigned OffsetFromOrigArg,
+                                  unsigned ArgOffset,
+                                  bool ForceMutable) const {
+
+  // Currently, two use-cases possible:
+  // Case #1. Non var-args function, and we meet first byval parameter.
+  //          Setup first unallocated register as first byval register;
+  //          eat all remained registers
+  //          (these two actions are performed by HandleByVal method).
+  //          Then, here, we initialize stack frame with
+  //          "store-reg" instructions.
+  // Case #2. Var-args function, that doesn't contain byval parameters.
+  //          The same: eat all remained unallocated registers,
+  //          initialize stack frame.
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned firstRegToSaveIndex;
-  if (CCInfo.isFirstByValRegValid())
-    firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
-  else {
+  unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+  unsigned RBegin, REnd;
+  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+    firstRegToSaveIndex = RBegin - ARM::R0;
+    lastRegToSaveIndex = REnd - ARM::R0;
+  } else {
     firstRegToSaveIndex = CCInfo.getFirstUnallocated
       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+    lastRegToSaveIndex = 4;
   }
 
-  unsigned VARegSize, VARegSaveSize;
-  computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
-  if (VARegSaveSize) {
-    // If this function is vararg, store any remaining integer argument regs
-    // to their spots on the stack so that they may be loaded by deferencing
-    // the result of va_next.
-    AFI->setVarArgsRegSaveSize(VARegSaveSize);
-    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
-                                                     ArgOffset + VARegSaveSize
-                                                     - VARegSize,
-                                                     false));
-    SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
-                                    getPointerTy());
+  unsigned ArgRegsSize, ArgRegsSaveSize;
+  computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize);
+
+  // Store any by-val regs to their spots on the stack so that they may be
+  // loaded by deferencing the result of formal parameter pointer or va_next.
+  // Note: once stack area for byval/varargs registers
+  // was initialized, it can't be initialized again.
+  if (ArgRegsSaveSize) {
+
+    int FrameIndex = MFI->CreateFixedObject(
+                      ArgRegsSaveSize,
+                      ArgOffset + ArgRegsSaveSize - ArgRegsSize,
+                      false);
+    SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
 
     SmallVector<SDValue, 4> MemOps;
-    for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+    for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+         ++firstRegToSaveIndex, ++i) {
       const TargetRegisterClass *RC;
       if (AFI->isThumb1OnlyFunction())
         RC = &ARM::tGPRRegClass;
@@ -2661,13 +2728,37 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                         DAG.getConstant(4, getPointerTy()));
     }
+
+    AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
     if (!MemOps.empty())
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                           &MemOps[0], MemOps.size());
+    return FrameIndex;
   } else
     // This will point to the next argument passed via stack.
-    AFI->setVarArgsFrameIndex(
-        MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
+    return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable);
+}
+
+// Setup stack frame, the va_list pointer will start from.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                                        DebugLoc dl, SDValue &Chain,
+                                        unsigned ArgOffset,
+                                        bool ForceMutable) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Try to store any remaining integer argument regs
+  // to their spots on the stack so that they may be loaded by deferencing
+  // the result of va_next.
+  // If there is no regs to be stored, just point address after last
+  // argument passed via stack.
+  int FrameIndex =
+    StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+                   0, ArgOffset, ForceMutable);
+
+  AFI->setVarArgsFrameIndex(FrameIndex);
 }
 
 SDValue
@@ -2696,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   SDValue ArgValue;
   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
+
+  // Initially ArgRegsSaveSize is zero.
+  // Then we increase this value each time we meet byval parameter.
+  // We also increase this value in case of varargs function.
+  AFI->setArgRegsSaveSize(0);
+
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
@@ -2793,20 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           // Since they could be overwritten by lowering of arguments in case of
           // a tail call.
           if (Flags.isByVal()) {
-            ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-            if (!AFI->getVarArgsFrameIndex()) {
-              VarArgStyleRegisters(CCInfo, DAG,
-                                   dl, Chain, CurOrigArg,
-                                   Ins[VA.getValNo()].PartOffset,
-                                   VA.getLocMemOffset(),
-                                   true /*force mutable frames*/);
-              int VAFrameIndex = AFI->getVarArgsFrameIndex();
-              InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
-            } else {
-              int FI = MFI->CreateFixedObject(Flags.getByValSize(),
-                                              VA.getLocMemOffset(), false);
-              InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
-            }
+            unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+            int FrameIndex = StoreByValRegs(
+                CCInfo, DAG, dl, Chain, CurOrigArg,
+                CurByValIndex,
+                Ins[VA.getValNo()].PartOffset,
+                VA.getLocMemOffset(),
+                true /*force mutable frames*/);
+            InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+            CCInfo.nextInRegsParam();
           } else {
             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
                                             VA.getLocMemOffset(), true);
@@ -2824,7 +2916,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // varargs
   if (isVarArg)
-    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+    VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
                          CCInfo.getNextStackOffset());
 
   return Chain;
@@ -5165,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   return false;
 }
 
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+  if (OrigVT.getSizeInBits() >= 64)
+    return OrigVT;
+
+  assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+  switch (OrigSimpleTy) {
+  default: llvm_unreachable("Unexpected Vector Type");
+  case MVT::v2i8:
+  case MVT::v2i16:
+     return MVT::v2i32;
+  case MVT::v4i8:
+    return  MVT::v4i16;
+  }
+}
+
 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
 /// We insert the required extension here to get the vector to fill a D register.
@@ -5180,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
     return N;
 
   // Must extend size to at least 64 bits to be used as an operand for VMULL.
-  MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
-  EVT NewVT;
-  switch (OrigSimpleTy) {
-  default: llvm_unreachable("Unexpected Orig Vector Type");
-  case MVT::v2i8:
-  case MVT::v2i16:
-    NewVT = MVT::v2i32;
-    break;
-  case MVT::v4i8:
-    NewVT = MVT::v4i16;
-    break;
-  }
+  EVT NewVT = getExtensionTo64Bits(OrigTy);
+
   return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
 }
 
@@ -5201,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
 /// reach a total size of 64 bits. We have to add the extension separately
 /// because ARM does not have a sign/zero extending load for vectors.
 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
-  SDValue NonExtendingLoad =
-    DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+  // The load already has the right type.
+  if (ExtendedTy == LD->getMemoryVT())
+    return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
                 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
                 LD->isNonTemporal(), LD->isInvariant(),
                 LD->getAlignment());
-  unsigned ExtOp = 0;
-  switch (LD->getExtensionType()) {
-  default: llvm_unreachable("Unexpected LoadExtType");
-  case ISD::EXTLOAD:
-  case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
-  case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
-  }
-  MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
-  MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
-  return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
-                                      MemType, ExtType, ExtOp);
+
+  // We need to create a zextload/sextload. We cannot just create a load
+  // followed by a zext/zext node because LowerMUL is also run during normal
+  // operation legalization where we can't create illegal types.
+  return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy,
+                        LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+                        LD->getMemoryVT(), LD->isVolatile(),
+                        LD->isNonTemporal(), LD->getAlignment());
 }
 
 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
@@ -5614,7 +5713,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG);
-  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
   case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 9ee17f0..426010e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -464,7 +464,8 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
+                            SmallVectorImpl<SDValue> &InVals,
+                            bool isThisReturn, SDValue ThisVal) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -473,16 +474,23 @@ namespace llvm {
                            DebugLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
 
+    int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+                       DebugLoc dl, SDValue &Chain,
+                       const Value *OrigArg,
+                       unsigned InRegsParamRecordIdx,
+                       unsigned OffsetFromOrigArg,
+                       unsigned ArgOffset,
+                       bool ForceMutable) const;
+
     void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
                               DebugLoc dl, SDValue &Chain,
-                              const Value *OrigArg,
-                              unsigned OffsetFromOrigArg,
                               unsigned ArgOffset,
-                              bool ForceMutable = false)
-      const;
+                              bool ForceMutable = false) const;
 
     void computeRegArea(CCState &CCInfo, MachineFunction &MF,
-                        unsigned &VARegSize, unsigned &VARegSaveSize) const;
+                        unsigned InRegsParamRecordIdx,
+                        unsigned &ArgRegsSize,
+                        unsigned &ArgRegsSaveSize) const;
 
     virtual SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 11550c5..1bd174e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -221,6 +221,9 @@ def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
 def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
                                  AssemblerPredicate<"FeatureMP",
                                                     "mp-extensions">;
+def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
+                                 AssemblerPredicate<"FeatureTrustZone",
+                                                    "TrustZone">;
 def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
 def IsThumb          : Predicate<"Subtarget->isThumb()">,
@@ -578,6 +581,17 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
 def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
 def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
 
+/// imm0_4 predicate - Immediate in the range [0,4].
+def Imm0_4AsmOperand : ImmAsmOperand
+{ 
+  let Name = "Imm0_4"; 
+  let DiagnosticType = "ImmRange0_4";  
+}
+def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
+  let ParserMatchClass = Imm0_4AsmOperand;
+  let DecoderMethod = "DecodeImm0_4";
+}
+
 /// imm0_7 predicate - Immediate in the range [0,7].
 def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
 def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -741,18 +755,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
 // addrmode_imm12 := reg +/- imm12
 //
 def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-def addrmode_imm12 : Operand<i32>,
+class AddrMode_Imm12 : Operand<i32>,
                      ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
   // 12-bit immediate operand. Note that instructions using this encode
   // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
   // immediate values are as normal.
 
   let EncoderMethod = "getAddrModeImm12OpValue";
-  let PrintMethod = "printAddrModeImm12Operand";
   let DecoderMethod = "DecodeAddrModeImm12Operand";
   let ParserMatchClass = MemImm12OffsetAsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
+
+def addrmode_imm12 : AddrMode_Imm12 {
+  let PrintMethod = "printAddrModeImm12Operand<false>";
+}
+
+def addrmode_imm12_pre : AddrMode_Imm12 {
+  let PrintMethod = "printAddrModeImm12Operand<true>";
+}
+
 // ldst_so_reg := reg +/- reg shop imm
 //
 def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
@@ -852,14 +874,23 @@ def am2offset_imm : Operand<i32>,
 //
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-def addrmode3 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+class AddrMode3 : Operand<i32>,
+                  ComplexPattern<i32, 3, "SelectAddrMode3", []> {
   let EncoderMethod = "getAddrMode3OpValue";
-  let PrintMethod = "printAddrMode3Operand";
   let ParserMatchClass = AddrMode3AsmOperand;
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
 }
 
+def addrmode3 : AddrMode3
+{
+  let PrintMethod = "printAddrMode3Operand<false>";
+}
+
+def addrmode3_pre : AddrMode3
+{
+  let PrintMethod = "printAddrMode3Operand<true>";
+}
+
 // FIXME: split into imm vs. reg versions.
 // FIXME: parser method to handle +/- register.
 def AM3OffsetAsmOperand : AsmOperandClass {
@@ -885,15 +916,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
 // addrmode5 := reg +/- imm8*4
 //
 def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-def addrmode5 : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrMode5", []> {
-  let PrintMethod = "printAddrMode5Operand";
+class AddrMode5 : Operand<i32>,
+                  ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let EncoderMethod = "getAddrMode5OpValue";
   let DecoderMethod = "DecodeAddrMode5Operand";
   let ParserMatchClass = AddrMode5AsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm);
 }
 
+def addrmode5 : AddrMode5 {
+   let PrintMethod = "printAddrMode5Operand<false>";
+}
+
+def addrmode5_pre : AddrMode5 {
+   let PrintMethod = "printAddrMode5Operand<true>";
+}
+
 // addrmode6 := reg with optional alignment
 //
 def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
@@ -1668,11 +1706,11 @@ def ATOMUMAX6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
                               NoItinerary, []>;
 }
 
-def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
               "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
-  bits<8> imm;
-  let Inst{27-8} = 0b00110010000011110000;
-  let Inst{7-0} = imm;
+  bits<3> imm;
+  let Inst{27-3} = 0b0011001000001111000000000;
+  let Inst{2-0} = imm;
 }
 
 def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -2077,7 +2115,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
 
 // Secure Monitor Call is a system instruction.
 def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
-              []> {
+              []>, Requires<[IsARM, HasTrustZone]> {
   bits<4> opt;
   let Inst{23-4} = 0b01100000000000000111;
   let Inst{3-0} = opt;
@@ -2238,7 +2276,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
 multiclass AI2_ldridx<bit isByte, string opc,
                       InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                      (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
+                      (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii,
                       opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 0;
@@ -2275,6 +2313,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
      let Inst{23} = offset{12};
      let Inst{19-16} = addr;
      let Inst{11-0} = offset{11-0};
+     let Inst{4} = 0;
 
     let DecoderMethod = "DecodeAddrMode2IdxInstruction";
    }
@@ -2307,7 +2346,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
 
 multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
   def _PRE  : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                        (ins addrmode3:$addr), IndexModePre,
+                        (ins addrmode3_pre:$addr), IndexModePre,
                         LdMiscFrm, itin,
                         opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<14> addr;
@@ -2341,7 +2380,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
 defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
 let hasExtraDefRegAllocReq = 1 in {
 def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
-                          (ins addrmode3:$addr), IndexModePre,
+                          (ins addrmode3_pre:$addr), IndexModePre,
                           LdMiscFrm, IIC_iLoad_d_ru,
                           "ldrd", "\t$Rt, $Rt2, $addr!",
                           "$addr.base = $Rn_wb", []> {
@@ -2497,7 +2536,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
 multiclass AI2_stridx<bit isByte, string opc,
                       InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
-                            (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+                            (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
                             StFrm, iii,
                             opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
@@ -2619,7 +2658,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
 
 
 def STRH_PRE  : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
-                           (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+                           (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
                            StMiscFrm, IIC_iStore_bh_ru,
                            "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
   bits<14> addr;
@@ -2651,7 +2690,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
 
 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
-                          (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+                          (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
                           IndexModePre, StMiscFrm, IIC_iStore_d_ru,
                           "strd", "\t$Rt, $Rt2, $addr!",
                           "$addr.base = $Rn_wb", []> {
@@ -4426,7 +4465,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
     let Inst{7-0} = addr{7-0};
     let DecoderMethod = "DecodeCopMemInstruction";
   }
-  def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+  def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
                  asm, "\t$cop, $CRd, $addr!", IndexModePre> {
     bits<13> addr;
     bits<4> cop;
@@ -4497,7 +4536,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
     let Inst{7-0} = addr{7-0};
     let DecoderMethod = "DecodeCopMemInstruction";
   }
-  def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+  def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
                     asm, "\t$cop, $CRd, $addr!", IndexModePre> {
     bits<13> addr;
     bits<4> cop;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 0411ac4..896fd0f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4316,6 +4316,24 @@ def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
 defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                         IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
 
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+
 // Vector Bitwise Operations.
 
 def vnotd : PatFrag<(ops node:$in),
@@ -4889,6 +4907,29 @@ def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
                      "vabs", "f32",
                       v4f32, v4f32, fabs>;
 
+def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
+               (v2i32 (bitconvert (v8i8 (add DPR:$src,
+                                             (NEONvshrs DPR:$src, (i32 7))))))),
+          (VABSv8i8 DPR:$src)>;
+def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
+               (v2i32 (bitconvert (v4i16 (add DPR:$src,
+                                            (NEONvshrs DPR:$src, (i32 15))))))),
+          (VABSv4i16 DPR:$src)>;
+def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
+               (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
+          (VABSv2i32 DPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
+               (v4i32 (bitconvert (v16i8 (add QPR:$src,
+                                             (NEONvshrs QPR:$src, (i32 7))))))),
+          (VABSv16i8 QPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
+               (v4i32 (bitconvert (v8i16 (add QPR:$src,
+                                            (NEONvshrs QPR:$src, (i32 15))))))),
+          (VABSv8i16 QPR:$src)>;
+def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
+               (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
+          (VABSv4i32 QPR:$src)>;
+
 def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
 def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c9d709e..4dacb86 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
 def t2addrmode_imm12 : Operand<i32>,
                        ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
-  let PrintMethod = "printAddrModeImm12Operand";
+  let PrintMethod = "printAddrModeImm12Operand<false>";
   let EncoderMethod = "getAddrModeImm12OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm12";
   let ParserMatchClass = t2addrmode_imm12_asmoperand;
@@ -3401,12 +3401,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
   bits<5> mode;
   bit M;
 
-  let Inst{31-27} = 0b11110;
-  let Inst{26}    = 0;
-  let Inst{25-20} = 0b111010;
-  let Inst{19-16} = 0b1111;
-  let Inst{15-14} = 0b10;
-  let Inst{12}    = 0;
+  let Inst{31-11} = 0b111100111010111110000;
   let Inst{10-9}  = imod;
   let Inst{8}     = M;
   let Inst{7-5}   = iflags;
@@ -3425,13 +3420,13 @@ let imod = 0, iflags = 0, M = 1 in
 
 // A6.3.4 Branches and miscellaneous control
 // Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
-  bits<8> imm;
-  let Inst{31-8} = 0b111100111010111110000000;
-  let Inst{7-0} = imm;
+def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
+  bits<3> imm;
+  let Inst{31-3} = 0b11110011101011111000000000000;
+  let Inst{2-0} = imm;
 }
 
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
 def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
 def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
 def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
@@ -3449,7 +3444,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
 
 // Secure Monitor Call is a system instruction.
 // Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", 
+                []>, Requires<[IsThumb2, HasTrustZone]> {
   let Inst{31-27} = 0b11110;
   let Inst{26-20} = 0b1111111;
   let Inst{15-12} = 0b1000;
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b7ac5d5..c8ed576 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -87,53 +87,6 @@ namespace {
                       MachineBasicBlock::iterator i)
         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
     };
-    class UnitRegsMap {
-    public:
-      UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
-      const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
-        DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
-            Cache.find(Reg);
-        if (found != Cache.end())
-          return found->second;
-        else
-          return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
-                      .first->second;
-      }
-    private:
-      SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
-        SmallVector<unsigned, 4> Res;
-
-        const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
-        if (TRC == &ARM::QPRRegClass) {
-          if (Reg > ARM::Q7) {
-            Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
-            Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
-            return Res;
-          }
-
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
-
-          return Res;
-        }
-
-        if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
-
-          return Res;
-        }
-
-        Res.push_back(Reg);
-
-        return Res;
-
-      }
-      const TargetRegisterInfo* TRI;
-      DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
-    };
     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     typedef MemOpQueue::iterator MemOpQueueIter;
 
@@ -175,11 +128,6 @@ namespace {
                                    MachineBasicBlock::iterator MBBI,
                                    bool &Advance,
                                    MachineBasicBlock::iterator &I);
-    unsigned AddMemOp(MemOpQueue& MemOps,
-                      const MemOpQueueEntry newEntry,
-                      UnitRegsMap& UnitRegsInfo,
-                      SmallSet<unsigned, 4>& UsedUnitRegs,
-                      unsigned At = -1U);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -1265,103 +1213,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   return false;
 }
 
-/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
-/// It adds store mem ops with simple push_back/insert method,
-/// without any additional logic.
-/// For load operation it does the next:
-/// 1. Adds new load operation into MemOp collection at "At" position.
-/// 2. Removes any "load" operations from MemOps, that changes "Reg" register
-/// contents, prior to "At".
-/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
-/// UsedUnitRegs - set of unit-registers currently in use.
-/// At - position at which it would added, and prior which the clean-up
-/// should be made (for load operation).
-/// FIXME: The clean-up also should be made for store operations,
-/// but the memory address should be analyzed instead of unit registers.
-unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
-                                   const MemOpQueueEntry NewEntry,
-                                   UnitRegsMap& UnitRegsInfo,
-                                   SmallSet<unsigned, 4>& UsedUnitRegs,
-                                   unsigned At) {
-  unsigned Cleaned = 0;
-
-  if (At == -1U) {
-    At = MemOps.size();
-    MemOps.push_back(NewEntry);
-  } else
-    MemOps.insert(&MemOps[At], NewEntry);
-
-  // FIXME:
-  // If operation is not load, leave it as is by now,
-  // So 0 overridden ops would cleaned in this case.
-  if (!NewEntry.MBBI->mayLoad())
-    return 0;
-
-  const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
-
-  bool FoundOverriddenLoads = false;
-
-  for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
-    if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
-      FoundOverriddenLoads = true;
-      break;
-    }
-
-  // If we detect that this register is used by load operations that are
-  // predecessors for the new one, remove them from MemOps then.
-  if (FoundOverriddenLoads) {
-    MemOpQueue UpdatedMemOps;
-
-    // Scan through MemOps entries.
-    for (unsigned i = 0; i != At; ++i) {
-      MemOpQueueEntry& MemOpEntry = MemOps[i];
-
-      // FIXME: Skip non-load operations by now.
-      if (!MemOpEntry.MBBI->mayLoad())
-        continue;
-
-      const SmallVector<unsigned, 4>& MemOpUnitRegs =
-          UnitRegsInfo[MemOpEntry.Reg];
-
-      // Lookup entry that loads contents into register used by new entry.
-      bool ReleaseThisEntry = false;
-      for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
-        if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
-                      MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
-          ReleaseThisEntry = true;
-          ++Cleaned;
-          break;
-        }
-      }
-
-      if (ReleaseThisEntry) {
-        const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
-        for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
-          UsedUnitRegs.erase(RelesedRegs[r]);
-      } else
-        UpdatedMemOps.push_back(MemOpEntry);
-    }
-
-    // Keep anything without changes after At position.
-    for (unsigned i = At, e = MemOps.size(); i != e; ++i)
-      UpdatedMemOps.push_back(MemOps[i]);
-
-    MemOps.swap(UpdatedMemOps);
-  }
-
-  UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
-
-  return Cleaned;
-}
-
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   unsigned NumMerges = 0;
   unsigned NumMemOps = 0;
   MemOpQueue MemOps;
-  UnitRegsMap UnitRegsInfo(TRI);
-  SmallSet<unsigned, 4> UsedRegUnits;
   unsigned CurrBase = 0;
   int CurrOpc = -1;
   unsigned CurrSize = 0;
@@ -1401,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       // merge the ldr's so far, including this one. But don't try to
       // combine the following ldr(s).
       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+
+      // Watch out for:
+      // r4 := ldr [r0, #8]
+      // r4 := ldr [r0, #4]
+      //
+      // The optimization may reorder the second ldr in front of the first
+      // ldr, which violates write after write(WAW) dependence. The same as
+      // str. Try to merge inst(s) already in MemOps.
+      bool Overlap = false;
+      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+          Overlap = true;
+          break;
+        }
+      }
+
       if (CurrBase == 0 && !Clobber) {
         // Start of a new chain.
         CurrBase = Base;
@@ -1408,13 +1281,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         CurrSize = Size;
         CurrPred = Pred;
         CurrPredReg = PredReg;
-
         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
         ++NumMemOps;
-        const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
-        UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
         Advance = true;
-      } else {
+      } else if (!Overlap) {
         if (Clobber) {
           TryMerge = true;
           Advance = true;
@@ -1424,24 +1294,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
           // No need to match PredReg.
           // Continue adding to the queue.
           if (Offset > MemOps.back().Offset) {
-            unsigned OverridesCleaned =
-              AddMemOp(MemOps,
-                           MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
-                           UnitRegsInfo, UsedRegUnits) != 0;
-            NumMemOps += 1 - OverridesCleaned;
+            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+                                             Position, MBBI));
+            ++NumMemOps;
             Advance = true;
           } else {
-            for (unsigned I = 0; I != NumMemOps; ++I) {
-              if (Offset < MemOps[I].Offset) {
-                MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
-                unsigned OverridesCleaned =
-                    AddMemOp(MemOps, entry, UnitRegsInfo,
-                                 UsedRegUnits, I) != 0;
-                NumMemOps += 1 - OverridesCleaned;
-
+            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+                 I != E; ++I) {
+              if (Offset < I->Offset) {
+                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+                                                 Position, MBBI));
+                ++NumMemOps;
                 Advance = true;
                 break;
-              } else if (Offset == MemOps[I].Offset) {
+              } else if (Offset == I->Offset) {
                 // Collision! This can't be merged!
                 break;
               }
@@ -1512,7 +1378,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       CurrPredReg = 0;
       if (NumMemOps) {
         MemOps.clear();
-        UsedRegUnits.clear();
         NumMemOps = 0;
       }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 88d96c0..f4248fc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -38,7 +38,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
   /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
   ///
-  unsigned VarArgsRegSaveSize;
+  unsigned ArgRegsSaveSize;
 
   /// HasStackFrame - True if this function has a stack frame. Set by
   /// processFunctionBeforeCalleeSavedScan().
@@ -117,7 +117,7 @@ public:
   ARMFunctionInfo() :
     isThumb(false),
     hasThumb2(false),
-    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -129,7 +129,7 @@ public:
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
     hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
-    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -141,8 +141,8 @@ public:
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
   bool isThumb2Function() const { return isThumb && hasThumb2; }
 
-  unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
-  void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+  unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+  void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
 
   bool hasStackFrame() const { return HasStackFrame; }
   void setHasStackFrame(bool s) { HasStackFrame = s; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 739300e..8653c46 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -91,6 +91,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasRAS = false;
   HasMPExtension = false;
   FPOnlySP = false;
+  HasTrustZone = false;
   AllowsUnalignedMem = false;
   Thumb2DSP = false;
   UseNaClTrap = false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 5b5ee6a..038eb76 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -148,6 +148,9 @@ protected:
   /// precision.
   bool FPOnlySP;
 
+  /// HasTrustZone - if true, processor supports TrustZone security extensions
+  bool HasTrustZone;
+
   /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
   /// accesses for some types.  For details, see
   /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -251,6 +254,7 @@ public:
   bool hasVMLxForwarding() const { return HasVMLxForwarding; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
   bool isFPOnlySP() const { return FPOnlySP; }
+  bool hasTrustZone() const { return HasTrustZone; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
   bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
   bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1019b97..53ece66 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -125,6 +125,10 @@ public:
   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
 
   unsigned getAddressComputationCost(Type *Val) const;
+
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                  OperandValueKind Op1Info = OK_AnyValue,
+                                  OperandValueKind Op2Info = OK_AnyValue) const;
   /// @}
 };
 
@@ -223,9 +227,9 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
 
-    // Operations that we legalize using load/stores to the stack.
-    { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
-    { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+    // Operations that we legalize using splitting.
+    { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 6 },
+    { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 3 },
 
     // Vector float <-> i32 conversions.
     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
@@ -456,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
 
   return LT.first * NEONShuffleTbl[Idx].Cost;
 }
+
+unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+                                        OperandValueKind Op2Info) const {
+
+  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+  const unsigned FunctionCallDivCost = 20;
+  const unsigned ReciprocalDivCost = 10;
+  static const CostTblEntry<MVT> CostTbl[] = {
+    // Division.
+    // These costs are somewhat random. Choose a cost of 20 to indicate that
+    // vectorizing devision (added function call) is going to be very expensive.
+    // Double registers types.
+    { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
+    { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
+    { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
+    { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
+    { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
+    // Quad register types.
+    { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+    // Multiplication.
+  };
+
+  int Idx = -1;
+
+  if (ST->hasNEON())
+    Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
+                               LT.second);
+
+  if (Idx != -1)
+    return LT.first * CostTbl[Idx].Cost;
+
+
+  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+                                                     Op2Info);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ed7b7ec..1dd2953 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -86,11 +86,11 @@ class ARMAsmParser : public MCTargetAsmParser {
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
   bool Warning(SMLoc L, const Twine &Msg,
-               ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+               ArrayRef<SMRange> Ranges = None) {
     return Parser.Warning(L, Msg, Ranges);
   }
   bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+             ArrayRef<SMRange> Ranges = None) {
     return Parser.Error(L, Msg, Ranges);
   }
 
@@ -610,6 +610,13 @@ public:
     int64_t Value = CE->getValue();
     return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
   }
+  bool isImm0_4() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 5;
+  }
   bool isImm0_1020s4() const {
     if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -4745,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "mls"   || Mnemonic == "smmls"  || Mnemonic == "vcls"  ||
       Mnemonic == "vmls"  || Mnemonic == "vnmls"  || Mnemonic == "vacge" ||
       Mnemonic == "vcge"  || Mnemonic == "vclt"   || Mnemonic == "vacgt" ||
+      Mnemonic == "vaclt" || Mnemonic == "vacle"  ||
       Mnemonic == "vcgt"  || Mnemonic == "vcle"   || Mnemonic == "smlal" ||
       Mnemonic == "umaal" || Mnemonic == "umlal"  || Mnemonic == "vabal" ||
       Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -5014,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) {
 static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
   return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
 }
-
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+                                 unsigned VariantID);
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                                     SMLoc NameLoc,
@@ -5026,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // MatchInstructionImpl(), but that's too late for aliases that include
   // any sort of suffix.
   unsigned AvailableFeatures = getAvailableFeatures();
-  applyMnemonicAliases(Name, AvailableFeatures);
+  unsigned AssemblerDialect = getParser().getAssemblerDialect();
+  applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
 
   // First check for the ARM-specific .req directive.
   if (Parser.getTok().is(AsmToken::Identifier) &&
@@ -7613,6 +7622,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "instruction variant requires ARMv6 or later");
   case Match_RequiresThumb2:
     return Error(IDLoc, "instruction variant requires Thumb2");
+  case Match_ImmRange0_4: {
+    SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+    if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+  }
   case Match_ImmRange0_15: {
     SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
     if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 2e009e5..ac937f3 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder);
 
 
 static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -1951,10 +1953,12 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
     Inst.addOperand(MCOperand::CreateImm(mode));
     if (iflags) S = MCDisassembler::SoftFail;
   } else {
-    // imod == '00' && M == '0' --> UNPREDICTABLE
-    Inst.setOpcode(ARM::t2CPS1p);
-    Inst.addOperand(MCOperand::CreateImm(mode));
-    S = MCDisassembler::SoftFail;
+    // imod == '00' && M == '0' --> this is a HINT instruction
+    int imm = fieldFromInstruction(Insn, 0, 8);
+    // HINT are defined only for immediate in [0..4]
+    if(imm > 4) return MCDisassembler::Fail;
+    Inst.setOpcode(ARM::t2HINT);
+    Inst.addOperand(MCOperand::CreateImm(imm));
   }
 
   return S;
@@ -1996,9 +2000,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
   imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
 
   if (Inst.getOpcode() == ARM::MOVTi16)
-    if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+    if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
@@ -3570,7 +3575,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -4496,6 +4501,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder)
+{
+  unsigned Imm = fieldFromInstruction(Insn, 0, 3);
+  if (Imm > 4) return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::CreateImm(Imm));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2afb20d..3bcd083 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
-                                                raw_ostream &O) {
+                                                raw_ostream &O,
+                                                bool AlwaysPrintImm0) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op+1);
   const MCOperand &MO3 = MI->getOperand(Op+2);
@@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
   ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
 
-  if (ImmOffs || (op == ARM_AM::sub)) {
+  if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
     O << ", "
       << markup("<imm:")
       << "#"
@@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
   O << ']' << markup(">");
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
@@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
     printAM3PostIndexOp(MI, Op, O);
     return;
   }
-  printAM3PreOrOffsetIndexOp(MI, Op, O);
+  printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
 }
 
 void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
@@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
   O << ARM_AM::getAMSubModeStr(Mode);
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
 
   unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
   unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
-  if (ImmOffs || Op == ARM_AM::sub) {
+  if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
     O << ", "
       << markup("<imm:")
       << "#"
@@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
                    ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
     OffImm = 0;
   if (isSub) {
     O << ", "
-      << markup("<imm:") 
+      << markup("<imm:")
       << "#-" << -OffImm
       << markup(">");
   }
-  else if (OffImm > 0) {
+  else if (AlwaysPrintImm0 || OffImm > 0) {
     O << ", "
-      << markup("<imm:") 
+      << markup("<imm:")
       << "#" << OffImm
       << markup(">");
   }
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index edff75d..344104e 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -47,12 +47,13 @@ public:
                                   raw_ostream &O);
   void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
-
+  template <bool AlwaysPrintImm0>
   void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
   void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+                                  bool AlwaysPrintImm0);
   void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
                                raw_ostream &O);
   void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -60,6 +61,7 @@ public:
                                raw_ostream &O);
 
   void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -91,6 +93,7 @@ public:
                                    raw_ostream &O);
 
   void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template<bool AlwaysPrintImm0>
   void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
                                  raw_ostream &O);
   void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 418971d..6c3d247 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,7 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMRegisterInfo.h"
 #include "ARMUnwindOp.h"
+#include "ARMUnwindOpAsm.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -26,6 +28,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
@@ -33,11 +36,15 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
+  assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index");
+  return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
+}
+
 namespace {
 
 /// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -57,8 +64,9 @@ public:
   ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
                  MCCodeEmitter *Emitter, bool IsThumb)
       : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
-        IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
-        FnStart(0), Personality(0), CantUnwind(false) {}
+        IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+    Reset();
+  }
 
   ~ARMELFStreamer() {}
 
@@ -75,14 +83,15 @@ public:
   virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                            bool isVector);
 
-  virtual void ChangeSection(const MCSection *Section) {
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection) {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
-    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastMappingSymbols[getPreviousSection().first] = LastEMS;
     LastEMS = LastMappingSymbols.lookup(Section);
 
-    MCELFStreamer::ChangeSection(Section);
+    MCELFStreamer::ChangeSection(Section, Subsection);
   }
 
   /// This function is the one used to emit instruction data into the ELF
@@ -175,7 +184,7 @@ private:
     MCELF::SetType(SD, ELF::STT_NOTYPE);
     MCELF::SetBinding(SD, ELF::STB_LOCAL);
     SD.setExternal(false);
-    Symbol->setSection(*getCurrentSection());
+    Symbol->setSection(*getCurrentSection().first);
 
     const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
     Symbol->setVariableValue(Value);
@@ -194,6 +203,7 @@ private:
   void Reset();
 
   void EmitPersonalityFixup(StringRef Name);
+  void CollectUnwindOpcodes();
 
   void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
                          SectionKind Kind, const MCSymbol &Fn);
@@ -210,9 +220,16 @@ private:
   MCSymbol *ExTab;
   MCSymbol *FnStart;
   const MCSymbol *Personality;
+  uint32_t VFPRegSave; // Register mask for {d31-d0}
+  uint32_t RegSave; // Register mask for {r15-r0}
+  int64_t SPOffset;
+  uint16_t FPReg;
+  int64_t FPOffset;
+  bool UsedFP;
   bool CantUnwind;
+  UnwindOpcodeAssembler UnwindOpAsm;
 };
-}
+} // end anonymous namespace
 
 inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
                                               unsigned Type,
@@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
   } else {
     EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
   }
-  assert(EHSection);
+  assert(EHSection && "Failed to get the required EH section");
 
   // Switch to .ARM.extab or .ARM.exidx section
   SwitchSection(EHSection);
@@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
 }
 
 void ARMELFStreamer::Reset() {
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
   ExTab = NULL;
   FnStart = NULL;
   Personality = NULL;
+  VFPRegSave = 0;
+  RegSave = 0;
+  FPReg = MRI.getEncodingValue(ARM::SP);
+  FPOffset = 0;
+  SPOffset = 0;
+  UsedFP = false;
   CantUnwind = false;
+
+  UnwindOpAsm.Reset();
 }
 
 // Add the R_ARM_NONE fixup at the same position
@@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
                     MCFixup::getKindForSize(4, false)));
 }
 
+void ARMELFStreamer::CollectUnwindOpcodes() {
+  if (UsedFP) {
+    UnwindOpAsm.EmitSetFP(FPReg);
+    UnwindOpAsm.EmitSPOffset(-FPOffset);
+  } else {
+    UnwindOpAsm.EmitSPOffset(SPOffset);
+  }
+  UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
+  UnwindOpAsm.EmitRegSave(RegSave);
+  UnwindOpAsm.Finalize();
+}
+
 void ARMELFStreamer::EmitFnStart() {
   assert(FnStart == 0);
   FnStart = getContext().CreateTempSymbol();
@@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() {
   assert(FnStart && ".fnstart must preceeds .fnend");
 
   // Emit unwind opcodes if there is no .handlerdata directive
-  int PersonalityIndex = -1;
   if (!ExTab && !CantUnwind) {
-    // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
-    SwitchToExTabSection(*FnStart);
-
-    // Create .ARM.extab label for offset in .ARM.exidx
-    ExTab = getContext().CreateTempSymbol();
-    EmitLabel(ExTab);
-
-    PersonalityIndex = 1;
-
-    uint32_t Entry = 0;
-    uint32_t NumExtraEntryWords = 0;
-    Entry |= NumExtraEntryWords << 24;
-    Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
-
-    // TODO: This should be generated according to .save, .vsave, .setfp
-    // directives.  Currently, we are simply generating FINISH opcode.
-    Entry |= UNWIND_OPCODE_FINISH << 8;
-    Entry |= UNWIND_OPCODE_FINISH;
-
-    EmitIntValue(Entry, 4, 0);
+    CollectUnwindOpcodes();
+
+    unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+    if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
+        PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
+      // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
+      // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
+      // then emit a reference to these unwind opcodes in the second word of
+      // the exception index table entry.
+      SwitchToExTabSection(*FnStart);
+      ExTab = getContext().CreateTempSymbol();
+      EmitLabel(ExTab);
+      EmitBytes(UnwindOpAsm.data(), 0);
+    }
   }
 
   // Emit the exception index table entry
   SwitchToExIdxSection(*FnStart);
 
-  if (PersonalityIndex == 1)
-    EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+  unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+  if (PersonalityIndex < NUM_PERSONALITY_INDEX)
+    EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
 
   const MCSymbolRefExpr *FnStartRef =
     MCSymbolRefExpr::Create(FnStart,
@@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() {
 
   if (CantUnwind) {
     EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
-  } else {
+  } else if (ExTab) {
+    // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
     const MCSymbolRefExpr *ExTabEntryRef =
       MCSymbolRefExpr::Create(ExTab,
                               MCSymbolRefExpr::VK_ARM_PREL31,
                               getContext());
     EmitValue(ExTabEntryRef, 4, 0);
+  } else {
+    // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
+    // the second word of exception index table entry.  The size of the unwind
+    // opcodes should always be 4 bytes.
+    assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
+           "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+    assert(UnwindOpAsm.size() == 4u &&
+           "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
+    EmitBytes(UnwindOpAsm.data(), 0);
   }
 
   // Clean exception handling frame information
@@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() {
   EmitValue(PersonalityRef, 4, 0);
 
   // Emit unwind opcodes
-  uint32_t Entry = 0;
-  uint32_t NumExtraEntryWords = 0;
-
-  // TODO: This should be generated according to .save, .vsave, .setfp
-  // directives.  Currently, we are simply generating FINISH opcode.
-  Entry |= NumExtraEntryWords << 24;
-  Entry |= UNWIND_OPCODE_FINISH << 16;
-  Entry |= UNWIND_OPCODE_FINISH << 8;
-  Entry |= UNWIND_OPCODE_FINISH;
-
-  EmitIntValue(Entry, 4, 0);
+  CollectUnwindOpcodes();
+  EmitBytes(UnwindOpAsm.data(), 0);
 }
 
 void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
   Personality = Per;
+  UnwindOpAsm.setPersonality(Per);
 }
 
-void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
-                               unsigned NewSpReg,
+void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
+                               unsigned NewSPReg,
                                int64_t Offset) {
-  // TODO: Not implemented
+  assert(SPOffset == 0 &&
+         "Current implementation assumes .setfp precedes .pad");
+
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+  uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
+#ifndef NDEBUG
+  uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
+#endif
+
+  assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+         "the operand of .setfp directive should be either $sp or $fp");
+
+  UsedFP = true;
+  FPReg = NewFPRegEncVal;
+  FPOffset = Offset;
 }
 
 void ARMELFStreamer::EmitPad(int64_t Offset) {
-  // TODO: Not implemented
+  SPOffset += Offset;
 }
 
 void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                                  bool IsVector) {
-  // TODO: Not implemented
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+#ifndef NDEBUG
+  unsigned Max = IsVector ? 32 : 16;
+#endif
+  uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
+
+  for (size_t i = 0; i < RegList.size(); ++i) {
+    unsigned Reg = MRI.getEncodingValue(RegList[i]);
+    assert(Reg < Max && "Register encoded value out of range");
+    RegMask |= 1u << Reg;
+  }
 }
 
 namespace llvm {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
index dad5576..fa4add6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -107,6 +107,19 @@ namespace llvm {
     UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
   };
 
+  /// ARM-defined Personality Routine Index
+  enum ARMPersonalityRoutineIndex {
+    // To make the exception handling table become more compact, ARM defined
+    // several personality routines in EHABI.  There are 3 different
+    // personality routines in ARM EHABI currently.  It is possible to have 16
+    // pre-defined personality routines at most.
+    AEABI_UNWIND_CPP_PR0 = 0,
+    AEABI_UNWIND_CPP_PR1 = 1,
+    AEABI_UNWIND_CPP_PR2 = 2,
+
+    NUM_PERSONALITY_INDEX
+  };
+
 }
 
 #endif // ARM_UNWIND_OP_H
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
new file mode 100644
index 0000000..191db69
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -0,0 +1,198 @@
+//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOpAsm.h"
+
+#include "ARMUnwindOp.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
+  if (RegSave == 0u)
+    return;
+
+  // One byte opcode to save register r14 and r11-r4
+  if (RegSave & (1u << 4)) {
+    // The one byte opcode will always save r4, thus we can't use the one byte
+    // opcode when r4 is not in .save directive.
+
+    // Compute the consecutive registers from r4 to r11.
+    uint32_t Range = 0;
+    uint32_t Mask = (1u << 4);
+    for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
+      if ((RegSave & Bit) == 0u)
+        break;
+      ++Range;
+      Mask |= Bit;
+    }
+
+    // Emit this opcode when the mask covers every registers.
+    uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
+    if (UnmaskedReg == 0u) {
+      // Pop r[4 : (4 + n)]
+      Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+      RegSave &= 0x000fu;
+    } else if (UnmaskedReg == (1u << 14)) {
+      // Pop r[14] + r[4 : (4 + n)]
+      Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+      RegSave &= 0x000fu;
+    }
+  }
+
+  // Two bytes opcode to save register r15-r4
+  if ((RegSave & 0xfff0u) != 0) {
+    uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+
+  // Opcode to save register r3-r0
+  if ((RegSave & 0x000fu) != 0) {
+    uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+}
+
+/// Emit unwind opcodes for .vsave directives
+void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
+  size_t i = 32;
+
+  while (i > 16) {
+    uint32_t Bit = 1u << (i - 1);
+    if ((VFPRegSave & Bit) == 0u) {
+      --i;
+      continue;
+    }
+
+    uint32_t Range = 0;
+
+    --i;
+    Bit >>= 1;
+
+    while (i > 16 && (VFPRegSave & Bit)) {
+      --i;
+      ++Range;
+      Bit >>= 1;
+    }
+
+    uint32_t Op =
+        UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+
+  while (i > 0) {
+    uint32_t Bit = 1u << (i - 1);
+    if ((VFPRegSave & Bit) == 0u) {
+      --i;
+      continue;
+    }
+
+    uint32_t Range = 0;
+
+    --i;
+    Bit >>= 1;
+
+    while (i > 0 && (VFPRegSave & Bit)) {
+      --i;
+      ++Range;
+      Bit >>= 1;
+    }
+
+    uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+}
+
+/// Emit unwind opcodes for .setfp directives
+void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
+  Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+}
+
+/// Emit unwind opcodes to update stack pointer
+void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
+  if (Offset > 0x200) {
+    uint8_t Buff[10];
+    size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
+    Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
+    Ops.append(Buff, Buff + Size);
+  } else if (Offset > 0) {
+    if (Offset > 0x100) {
+      Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+      Offset -= 0x100;
+    }
+    Ops.push_back(UNWIND_OPCODE_INC_VSP |
+                  static_cast<uint8_t>((Offset - 4) >> 2));
+  } else if (Offset < 0) {
+    while (Offset < -0x100) {
+      Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+      Offset += 0x100;
+    }
+    Ops.push_back(UNWIND_OPCODE_DEC_VSP |
+                  static_cast<uint8_t>(((-Offset) - 4) >> 2));
+  }
+}
+
+void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
+  size_t SizeInWords = (size() + 3) / 4;
+  assert(SizeInWords <= 0x100u &&
+         "Only 256 additional words are allowed for unwind opcodes");
+  Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
+}
+
+void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
+  assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+  Ops[Pos] = EHT_COMPACT | PI;
+}
+
+void UnwindOpcodeAssembler::EmitFinishOpcodes() {
+  for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
+    Ops.push_back(UNWIND_OPCODE_FINISH);
+}
+
+void UnwindOpcodeAssembler::Finalize() {
+  if (HasPersonality) {
+    // Personality specified by .personality directive
+    Offset = 1;
+    AddOpcodeSizePrefix(1);
+  } else {
+    if (getOpcodeSize() <= 3) {
+      // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
+      Offset = 1;
+      PersonalityIndex = AEABI_UNWIND_CPP_PR0;
+      AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+    } else {
+      // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
+      Offset = 0;
+      PersonalityIndex = AEABI_UNWIND_CPP_PR1;
+      AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+      AddOpcodeSizePrefix(1);
+    }
+  }
+
+  // Emit the padding finish opcodes if the size() is not multiple of 4.
+  EmitFinishOpcodes();
+
+  // Swap the byte order
+  uint8_t *Ptr = Ops.begin() + Offset;
+  assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
+  for (size_t i = 0, n = size(); i < n; i += 4) {
+    std::swap(Ptr[i], Ptr[i + 3]);
+    std::swap(Ptr[i + 1], Ptr[i + 2]);
+  }
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
new file mode 100644
index 0000000..f6ecaeb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -0,0 +1,114 @@
+//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_ASM_H
+#define ARM_UNWIND_OP_ASM_H
+
+#include "ARMUnwindOp.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class UnwindOpcodeAssembler {
+private:
+  llvm::SmallVector<uint8_t, 8> Ops;
+
+  unsigned Offset;
+  unsigned PersonalityIndex;
+  bool HasPersonality;
+
+  enum {
+    // The number of bytes to be preserved for the size and personality index
+    // prefix of unwind opcodes.
+    NUM_PRESERVED_PREFIX_BUF = 2
+  };
+
+public:
+  UnwindOpcodeAssembler()
+      : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
+        PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+  }
+
+  /// Reset the unwind opcode assembler.
+  void Reset() {
+    Ops.resize(NUM_PRESERVED_PREFIX_BUF);
+    Offset = NUM_PRESERVED_PREFIX_BUF;
+    PersonalityIndex = NUM_PERSONALITY_INDEX;
+    HasPersonality = 0;
+  }
+
+  /// Get the size of the payload (including the size byte)
+  size_t size() const {
+    return Ops.size() - Offset;
+  }
+
+  /// Get the beginning of the payload
+  const uint8_t *begin() const {
+    return Ops.begin() + Offset;
+  }
+
+  /// Get the payload
+  StringRef data() const {
+    return StringRef(reinterpret_cast<const char *>(begin()), size());
+  }
+
+  /// Set the personality index
+  void setPersonality(const MCSymbol *Per) {
+    HasPersonality = 1;
+  }
+
+  /// Get the personality index
+  unsigned getPersonalityIndex() const {
+    return PersonalityIndex;
+  }
+
+  /// Emit unwind opcodes for .save directives
+  void EmitRegSave(uint32_t RegSave);
+
+  /// Emit unwind opcodes for .vsave directives
+  void EmitVFPRegSave(uint32_t VFPRegSave);
+
+  /// Emit unwind opcodes for .setfp directives
+  void EmitSetFP(uint16_t FPReg);
+
+  /// Emit unwind opcodes to update stack pointer
+  void EmitSPOffset(int64_t Offset);
+
+  /// Finalize the unwind opcode sequence for EmitBytes()
+  void Finalize();
+
+private:
+  /// Get the size of the opcodes in bytes.
+  size_t getOpcodeSize() const {
+    return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+  }
+
+  /// Add the length prefix to the payload
+  void AddOpcodeSizePrefix(size_t Pos);
+
+  /// Add personality index prefix in some compact format
+  void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+
+  /// Fill the words with finish opcode if it is not aligned
+  void EmitFinishOpcodes();
+};
+
+} // namespace llvm
+
+#endif // ARM_UNWIND_OP_ASM_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 2c3388c..1e2a8b0 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,7 +88,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   const Thumb1InstrInfo &TII =
     *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
 
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -104,8 +104,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
   int FramePtrSpillFI = 0;
 
-  if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+  if (ArgRegsSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
                  MachineInstr::FrameSetup);
 
   if (!AFI->hasStackFrame()) {
@@ -249,7 +249,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
   const Thumb1InstrInfo &TII =
     *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
 
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   int NumBytes = (int)MFI->getStackSize();
   const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -300,7 +300,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   }
 
-  if (VARegSaveSize) {
+  if (ArgRegsSaveSize) {
     // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
     // to LR, and we can't pop the value directly to the PC since
     // we need to update the SP after popping the value. Therefore, we
@@ -313,7 +313,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
       .addReg(ARM::R3, RegState::Define);
 
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
 
     MachineInstrBuilder MIB =
       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
@@ -376,7 +376,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
 
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   DebugLoc DL = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
   AddDefaultPred(MIB);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 67e8ec7..a1b48c2 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 
@@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC,
                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+
   if (RC == &ARM::GPRRegClass   || RC == &ARM::tGPRRegClass ||
       RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
       RC == &ARM::GPRnopcRegClass) {
-    DebugLoc DL;
-    if (I != MBB.end()) DL = I->getDebugLoc();
-
-    MachineFunction &MF = *MBB.getParent();
-    MachineFrameInfo &MFI = *MF.getFrameInfo();
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              MachineMemOperand::MOStore,
-                              MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     return;
   }
 
+  if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+    // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
+    // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+    // otherwise).
+    MachineRegisterInfo *MRI = &MF.getRegInfo();
+    MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
+    AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+    AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+    MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    AddDefaultPred(MIB);
+    return;
+  }
+
   ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
 }
 
@@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC,
                      const TargetRegisterInfo *TRI) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
   if (RC == &ARM::GPRRegClass   || RC == &ARM::tGPRRegClass ||
       RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
       RC == &ARM::GPRnopcRegClass) {
-    DebugLoc DL;
-    if (I != MBB.end()) DL = I->getDebugLoc();
-
-    MachineFunction &MF = *MBB.getParent();
-    MachineFrameInfo &MFI = *MF.getFrameInfo();
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              MachineMemOperand::MOLoad,
-                              MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     return;
   }
 
+  if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+    // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
+    // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+    // otherwise).
+    MachineRegisterInfo *MRI = &MF.getRegInfo();
+    MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
+    AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+    AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+    MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    AddDefaultPred(MIB);
+
+    if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+      MIB.addReg(DestReg, RegState::ImplicitDefine);
+    return;
+  }
+
   ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
 }
 
@@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
         Offset = -Offset;
         isSub = true;
       }
+    } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
+      Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+      NumBits = 8;
+      // MCInst operand has already scaled value.
+      Scale = 1;
+      if (Offset < 0) {
+        isSub = true;
+        Offset = -Offset;
+      }
     } else {
       llvm_unreachable("Unsupported addressing mode!");
     }
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d50f5d9..4795aae 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -926,13 +926,11 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   HighLatencyCPSR = false;
 
   // Check predecessors for the latest CPSRDef.
-  bool HasBackEdges = false;
   for (MachineBasicBlock::pred_iterator
        I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
     const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
     if (!PInfo.Visited) {
       // Since blocks are visited in RPO, this must be a back-edge.
-      HasBackEdges = true;
       continue;
     }
     if (PInfo.HighLatencyCPSR) {
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index dfbefc8..a9b00a2 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -29,26 +29,25 @@ namespace llvm {
   class HexagonTargetMachine;
   class raw_ostream;
 
-  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+  FunctionPass *createHexagonISelDag(const HexagonTargetMachine &TM,
                                      CodeGenOpt::Level OptLevel);
-  FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
-  FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
-  FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
-  FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
-
-  FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
-  FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
+  FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
+  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM);
 
+  FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonExpandPredSpillCode(
+                      const HexagonTargetMachine &TM);
   FunctionPass *createHexagonHardwareLoops();
   FunctionPass *createHexagonPeephole();
   FunctionPass *createHexagonFixupHwLoops();
   FunctionPass *createHexagonPacketizer();
   FunctionPass *createHexagonNewValueJump();
 
-
 /* TODO: object output.
   MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
-                                            TargetMachine &TM,
+                                            const TargetMachine &TM,
                                             MCContext &Ctx);
 */
 /* TODO: assembler input.
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 8a5ee40..9b3a643 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -84,12 +84,36 @@ def getPredOpcode : InstrMapping {
 }
 
 //===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-true instructions with their
+// predicate-false forms
+//
+def getFalsePredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+  let ColFields = ["PredSense"];
+  let KeyCol = ["true"];
+  let ValueCols = [["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-false instructions with their
+// predicate-true forms
+//
+def getTruePredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+  let ColFields = ["PredSense"];
+  let KeyCol = ["false"];
+  let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
 // Generate mapping table to relate predicated instructions with their .new
 // format.
 //
 def getPredNewOpcode : InstrMapping {
   let FilterClass = "PredNewRel";
-  let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+  let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"];
   let ColFields = ["PNewValue"];
   let KeyCol = [""];
   let ValueCols = [["new"]];
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index d4078ad..8597f11 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -26,21 +26,27 @@
 
 using namespace llvm;
 
+namespace llvm {
+  void initializeHexagonCFGOptimizerPass(PassRegistry&);
+}
+
+
 namespace {
 
 class HexagonCFGOptimizer : public MachineFunctionPass {
 
 private:
-  HexagonTargetMachine& QTM;
+  const HexagonTargetMachine& QTM;
   const HexagonSubtarget &QST;
 
   void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
 
  public:
   static char ID;
-  HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
-                                                  QTM(TM),
-                                                  QST(*TM.getSubtargetImpl()) {}
+  HexagonCFGOptimizer(const HexagonTargetMachine& TM)
+    : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+    initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
+  }
 
   const char *getPassName() const {
     return "Hexagon CFG Optimizer";
@@ -52,8 +58,8 @@ private:
 char HexagonCFGOptimizer::ID = 0;
 
 static bool IsConditionalBranch(int Opc) {
-  return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot)
-    || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt);
+  return (Opc == Hexagon::JMP_t) || (Opc == Hexagon::JMP_f)
+    || (Opc == Hexagon::JMP_tnew_t) || (Opc == Hexagon::JMP_fnew_t);
 }
 
 
@@ -68,20 +74,20 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
   const HexagonInstrInfo *QII = QTM.getInstrInfo();
   int NewOpcode = 0;
   switch(MI->getOpcode()) {
-  case Hexagon::JMP_c:
-    NewOpcode = Hexagon::JMP_cNot;
+  case Hexagon::JMP_t:
+    NewOpcode = Hexagon::JMP_f;
     break;
 
-  case Hexagon::JMP_cNot:
-    NewOpcode = Hexagon::JMP_c;
+  case Hexagon::JMP_f:
+    NewOpcode = Hexagon::JMP_t;
     break;
 
-  case Hexagon::JMP_cdnPt:
-    NewOpcode = Hexagon::JMP_cdnNotPt;
+  case Hexagon::JMP_tnew_t:
+    NewOpcode = Hexagon::JMP_fnew_t;
     break;
 
-  case Hexagon::JMP_cdnNotPt:
-    NewOpcode = Hexagon::JMP_cdnPt;
+  case Hexagon::JMP_fnew_t:
+    NewOpcode = Hexagon::JMP_tnew_t;
     break;
 
   default:
@@ -156,8 +162,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
         // The target of the unconditional branch must be JumpAroundTarget.
         // TODO: If not, we should not invert the unconditional branch.
         MachineBasicBlock* CondBranchTarget = NULL;
-        if ((MI->getOpcode() == Hexagon::JMP_c) ||
-            (MI->getOpcode() == Hexagon::JMP_cNot)) {
+        if ((MI->getOpcode() == Hexagon::JMP_t) ||
+            (MI->getOpcode() == Hexagon::JMP_f)) {
           CondBranchTarget = MI->getOperand(1).getMBB();
         }
 
@@ -231,6 +237,16 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+  PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg",
+                              &HexagonCFGOptimizer::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) {
   return new HexagonCFGOptimizer(TM);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 0814421..8a5991f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -41,16 +41,24 @@
 using namespace llvm;
 
 
+namespace llvm {
+  void initializeHexagonExpandPredSpillCodePass(PassRegistry&);
+}
+
+
 namespace {
 
 class HexagonExpandPredSpillCode : public MachineFunctionPass {
-    HexagonTargetMachine& QTM;
+    const HexagonTargetMachine& QTM;
     const HexagonSubtarget &QST;
 
  public:
     static char ID;
-    HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
-      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+    HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+      PassRegistry &Registry = *PassRegistry::getPassRegistry();
+      initializeHexagonExpandPredSpillCodePass(Registry);
+    }
 
     const char *getPassName() const {
       return "Hexagon Expand Predicate Spill Code";
@@ -175,6 +183,19 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Expand Predicate Spill Code";
+  PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred",
+                              &HexagonExpandPredSpillCode::ID,
+                              0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) {
   return new HexagonExpandPredSpillCode(TM);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index d6a9329..de993ee 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -189,7 +189,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
     // versions.
-    if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+    if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret
                         && !DisableDeallocRet) {
       // Remove jumpr node.
       MBB.erase(MBBI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1786624..d002788 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -541,12 +541,6 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
     case Hexagon::CMPEQrr:
       Cmp = !Negated ? Comparison::EQ : Comparison::NE;
       break;
-    case Hexagon::CMPLTrr:
-      Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
-      break;
-    case Hexagon::CMPLTUrr:
-      Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
-      break;
     case Hexagon::CMPGTUri:
     case Hexagon::CMPGTUrr:
       Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
@@ -1125,8 +1119,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
   // The loop ends with either:
   //  - a conditional branch followed by an unconditional branch, or
   //  - a conditional branch to the loop start.
-  if (LastI->getOpcode() == Hexagon::JMP_c ||
-      LastI->getOpcode() == Hexagon::JMP_cNot) {
+  if (LastI->getOpcode() == Hexagon::JMP_t ||
+      LastI->getOpcode() == Hexagon::JMP_f) {
     // Delete one and change/add an uncond. branch to out of the loop.
     MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
     LastI = LastMBB->erase(LastI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 8fc9ba1..54ca2c9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -49,11 +49,11 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
   const HexagonSubtarget &Subtarget;
 
   // Keep a reference to HexagonTargetMachine.
-  HexagonTargetMachine& TM;
+  const HexagonTargetMachine& TM;
   const HexagonInstrInfo *TII;
   DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
 public:
-  explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+  explicit HexagonDAGToDAGISel(const HexagonTargetMachine &targetmachine,
                                CodeGenOpt::Level OptLevel)
     : SelectionDAGISel(targetmachine, OptLevel),
       Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
@@ -160,6 +160,17 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) {
   return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
 }
 
+// XformS8ToS8M1Imm - Return a target constant decremented by 1.
+inline SDValue XformSToSM1Imm(signed Imm) {
+  return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+}
+
+// XformU8ToU8M1Imm - Return a target constant decremented by 1.
+inline SDValue XformUToUM1Imm(unsigned Imm) {
+  assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+  return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+}
+
 // Include the pieces autogenerated from the target description.
 #include "HexagonGenDAGISel.inc"
 };
@@ -169,7 +180,7 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) {
 /// createHexagonISelDag - This pass converts a legalized DAG into a
 /// Hexagon-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
+FunctionPass *llvm::createHexagonISelDag(const HexagonTargetMachine &TM,
                                          CodeGenOpt::Level OptLevel) {
   return new HexagonDAGToDAGISel(TM, OptLevel);
 }
@@ -697,7 +708,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
 
     // Build post increment store.
     SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
-                                            MVT::Other, Ops, 4);
+                                            MVT::Other, Ops);
     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
     MemOp[0] = ST->getMemOperand();
     cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -723,8 +734,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
 
   // Build regular store.
   SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
-  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
-                                            4);
+  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   // Build splitted incriment instruction.
   SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
                                             Base,
@@ -780,7 +790,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
                          Value, Chain};
         // build indexed store
         SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
-                                                MVT::Other, Ops, 4);
+                                                MVT::Other, Ops);
         MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
         MemOp[0] = ST->getMemOperand();
         cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -1230,8 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
     }
     EVT ReturnValueVT = N->getValueType(0);
     SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
-                                            ReturnValueVT,
-                                            Ops.data(), Ops.size());
+                                            ReturnValueVT, Ops);
     ReplaceUses(N, Result);
     return Result;
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 15858a9..0e5b8dc 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1002,14 +1002,6 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   return FrameAddr;
 }
 
-
-SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
-                                               SelectionDAG& DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other,  Op.getOperand(0));
-}
-
-
 SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
                                                  SelectionDAG& DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -1361,7 +1353,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
 
     }
 
-    setOperationAction(ISD::BRIND, MVT::Other, Expand);
     if (EmitJumpTables) {
       setOperationAction(ISD::BR_JT, MVT::Other, Custom);
     } else {
@@ -1377,7 +1368,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     setOperationAction(ISD::BR_CC, MVT::i32, Expand);
     setOperationAction(ISD::BR_CC, MVT::i64, Expand);
 
-    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 
     setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -1444,7 +1434,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
     setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
 
-    setOperationAction(ISD::EH_RETURN,     MVT::Other, Expand);
+    setOperationAction(ISD::EH_RETURN,     MVT::Other, Custom);
 
     if (TM.getSubtargetImpl()->isSubtargetV2()) {
       setExceptionPointerRegister(Hexagon::R20);
@@ -1499,6 +1489,7 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
     case HexagonISD::RET_FLAG:    return "HexagonISD::RET_FLAG";
     case HexagonISD::BR_JT:       return "HexagonISD::BR_JT";
     case HexagonISD::TC_RETURN:   return "HexagonISD::TC_RETURN";
+  case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
   }
 }
 
@@ -1520,16 +1511,43 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
 }
 
 SDValue
+HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  DebugLoc dl       = Op.getDebugLoc();
+
+  // Mark function as containing a call to EH_RETURN.
+  HexagonMachineFunctionInfo *FuncInfo =
+    DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
+  FuncInfo->setHasEHReturn();
+
+  unsigned OffsetReg = Hexagon::R28;
+
+  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                  DAG.getRegister(Hexagon::R30, getPointerTy()),
+                                  DAG.getIntPtrConstant(4));
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+                       false, false, 0);
+  Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
+
+  // Not needed we already use it as explict input to EH_RETURN.
+  // MF.getRegInfo().addLiveOut(OffsetReg);
+
+  return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
+}
+
+SDValue
 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
     default: llvm_unreachable("Should not custom lower this!");
     case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
       // Frame & Return address.  Currently unimplemented.
     case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
     case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
     case ISD::GlobalTLSAddress:
                           llvm_unreachable("TLS not implemented for Hexagon.");
-    case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, DAG);
     case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
     case ISD::GlobalAddress:      return LowerGLOBALADDRESS(Op, DAG);
     case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3279cc6..bb1acc1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -62,7 +62,8 @@ namespace llvm {
       WrapperShuffEH,
       WrapperShuffOB,
       WrapperShuffOH,
-      TC_RETURN
+      TC_RETURN,
+      EH_RETURN
     };
   }
 
@@ -101,6 +102,7 @@ namespace llvm {
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFormalArguments(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -122,7 +124,6 @@ namespace llvm {
 
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
     SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 60b12ac..f114170 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -23,7 +23,9 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #define GET_INSTRINFO_CTOR
 #define GET_INSTRMAP_INFO
 #include "HexagonGenInstrInfo.inc"
@@ -118,16 +120,16 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              DebugLoc DL) const{
 
     int BOpc   = Hexagon::JMP;
-    int BccOpc = Hexagon::JMP_c;
+    int BccOpc = Hexagon::JMP_t;
 
     assert(TBB && "InsertBranch must not be told to insert a fallthrough");
 
     int regPos = 0;
     // Check if ReverseBranchCondition has asked to reverse this branch
     // If we want to reverse the branch an odd number of times, we want
-    // JMP_cNot.
+    // JMP_f.
     if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
-      BccOpc = Hexagon::JMP_cNot;
+      BccOpc = Hexagon::JMP_f;
       regPos = 1;
     }
 
@@ -174,8 +176,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   FBB = NULL;
 
   // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::instr_iterator I = MBB.instr_end();
+  if (I == MBB.instr_begin())
     return false;
 
   // A basic block may looks like this:
@@ -194,13 +196,24 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     --I;
     if (I->isEHLabel())
       return true;
-  } while (I != MBB.begin());
+  } while (I != MBB.instr_begin());
 
-  I = MBB.end();
+  I = MBB.instr_end();
   --I;
 
   while (I->isDebugValue()) {
-    if (I == MBB.begin())
+    if (I == MBB.instr_begin())
+      return false;
+    --I;
+  }
+
+  // Delete the JMP if it's equivalent to a fall-through.
+  if (AllowModify && I->getOpcode() == Hexagon::JMP &&
+      MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+    DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+    I->eraseFromParent();
+    I = MBB.instr_end();
+    if (I == MBB.instr_begin())
       return false;
     --I;
   }
@@ -209,23 +222,42 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // Get the last instruction in the block.
   MachineInstr *LastInst = I;
+  MachineInstr *SecondLastInst = NULL;
+  // Find one more terminator if present.
+  do {
+    if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) {
+      if (!SecondLastInst)
+        SecondLastInst = I;
+      else
+        // This is a third branch.
+        return true;
+    }
+    if (I == MBB.instr_begin())
+      break;
+    --I;
+  } while(I);
+
+  int LastOpcode = LastInst->getOpcode();
+
+  bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
+  bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
 
   // If there is only one terminator instruction, process it.
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (LastInst->getOpcode() == Hexagon::JMP) {
+  if (LastInst && !SecondLastInst) {
+    if (LastOpcode == Hexagon::JMP) {
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
-    if (LastInst->getOpcode() == Hexagon::JMP_c) {
-      // Block ends with fall-through true condbranch.
-      TBB = LastInst->getOperand(1).getMBB();
+    if (LastOpcode == Hexagon::ENDLOOP0) {
+      TBB = LastInst->getOperand(0).getMBB();
       Cond.push_back(LastInst->getOperand(0));
       return false;
     }
-    if (LastInst->getOpcode() == Hexagon::JMP_cNot) {
-      // Block ends with fall-through false condbranch.
+    if (LastOpcodeHasJMP_c) {
       TBB = LastInst->getOperand(1).getMBB();
-      Cond.push_back(MachineOperand::CreateImm(0));
+      if (LastOpcodeHasNot) {
+        Cond.push_back(MachineOperand::CreateImm(0));
+      }
       Cond.push_back(LastInst->getOperand(0));
       return false;
     }
@@ -233,29 +265,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     return true;
   }
 
-  // Get the instruction before it if it's a terminator.
-  MachineInstr *SecondLastInst = I;
-
-  // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() &&
-      isUnpredicatedTerminator(--I))
-    return true;
+  int SecLastOpcode = SecondLastInst->getOpcode();
 
-  // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
-  if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
-      (SecondLastInst->getOpcode() == Hexagon::JMP_c)) &&
-      LastInst->getOpcode() == Hexagon::JMP) {
+  bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
+  bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
+  if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::JMP)) {
     TBB =  SecondLastInst->getOperand(1).getMBB();
-    Cond.push_back(SecondLastInst->getOperand(0));
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-
-  // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it.
-  if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) &&
-      LastInst->getOpcode() == Hexagon::JMP) {
-    TBB =  SecondLastInst->getOperand(1).getMBB();
-    Cond.push_back(MachineOperand::CreateImm(0));
+    if (SecLastOpcodeHasNot)
+      Cond.push_back(MachineOperand::CreateImm(0));
     Cond.push_back(SecondLastInst->getOperand(0));
     FBB = LastInst->getOperand(0).getMBB();
     return false;
@@ -263,8 +280,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // If the block ends with two Hexagon:JMPs, handle it.  The second one is not
   // executed, so remove it.
-  if (SecondLastInst->getOpcode() == Hexagon::JMP &&
-      LastInst->getOpcode() == Hexagon::JMP) {
+  if (SecLastOpcode == Hexagon::JMP && LastOpcode == Hexagon::JMP) {
     TBB = SecondLastInst->getOperand(0).getMBB();
     I = LastInst;
     if (AllowModify)
@@ -272,6 +288,15 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     return false;
   }
 
+  // If the block ends with an ENDLOOP, and JMP, handle it.
+  if (SecLastOpcode == Hexagon::ENDLOOP0 &&
+      LastOpcode == Hexagon::JMP) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
   // Otherwise, can't handle this.
   return true;
 }
@@ -279,8 +304,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
 unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   int BOpc   = Hexagon::JMP;
-  int BccOpc = Hexagon::JMP_c;
-  int BccOpcNot = Hexagon::JMP_cNot;
+  int BccOpc = Hexagon::JMP_t;
+  int BccOpcNot = Hexagon::JMP_f;
 
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin()) return 0;
@@ -325,8 +350,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
     case Hexagon::CMPGTUrr:
     case Hexagon::CMPGTri:
     case Hexagon::CMPGTrr:
-    case Hexagon::CMPLTUrr:
-    case Hexagon::CMPLTrr:
       SrcReg = MI->getOperand(1).getReg();
       Mask = ~0;
       break;
@@ -366,8 +389,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
     case Hexagon::CMPhEQrr_xor_V4:
     case Hexagon::CMPhGTUrr_V4:
     case Hexagon::CMPhGTrr_shl_V4:
-    case Hexagon::CMPLTUrr:
-    case Hexagon::CMPLTrr:
       SrcReg2 = MI->getOperand(2).getReg();
       return true;
 
@@ -605,110 +626,8 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
   return  false;
 }
 
-bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-    default: return false;
-    // JMP_EQri
-    case Hexagon::JMP_EQriPt_nv_V4:
-    case Hexagon::JMP_EQriPnt_nv_V4:
-    case Hexagon::JMP_EQriNotPt_nv_V4:
-    case Hexagon::JMP_EQriNotPnt_nv_V4:
-    case Hexagon::JMP_EQriPt_ie_nv_V4:
-    case Hexagon::JMP_EQriPnt_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPt_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
-
-    // JMP_EQri - with -1
-    case Hexagon::JMP_EQriPtneg_nv_V4:
-    case Hexagon::JMP_EQriPntneg_nv_V4:
-    case Hexagon::JMP_EQriNotPtneg_nv_V4:
-    case Hexagon::JMP_EQriNotPntneg_nv_V4:
-    case Hexagon::JMP_EQriPtneg_ie_nv_V4:
-    case Hexagon::JMP_EQriPntneg_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
-
-    // JMP_EQrr
-    case Hexagon::JMP_EQrrPt_nv_V4:
-    case Hexagon::JMP_EQrrPnt_nv_V4:
-    case Hexagon::JMP_EQrrNotPt_nv_V4:
-    case Hexagon::JMP_EQrrNotPnt_nv_V4:
-    case Hexagon::JMP_EQrrPt_ie_nv_V4:
-    case Hexagon::JMP_EQrrPnt_ie_nv_V4:
-    case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
-
-    // JMP_GTri
-    case Hexagon::JMP_GTriPt_nv_V4:
-    case Hexagon::JMP_GTriPnt_nv_V4:
-    case Hexagon::JMP_GTriNotPt_nv_V4:
-    case Hexagon::JMP_GTriNotPnt_nv_V4:
-    case Hexagon::JMP_GTriPt_ie_nv_V4:
-    case Hexagon::JMP_GTriPnt_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
-
-    // JMP_GTri - with -1
-    case Hexagon::JMP_GTriPtneg_nv_V4:
-    case Hexagon::JMP_GTriPntneg_nv_V4:
-    case Hexagon::JMP_GTriNotPtneg_nv_V4:
-    case Hexagon::JMP_GTriNotPntneg_nv_V4:
-    case Hexagon::JMP_GTriPtneg_ie_nv_V4:
-    case Hexagon::JMP_GTriPntneg_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
-
-    // JMP_GTrr
-    case Hexagon::JMP_GTrrPt_nv_V4:
-    case Hexagon::JMP_GTrrPnt_nv_V4:
-    case Hexagon::JMP_GTrrNotPt_nv_V4:
-    case Hexagon::JMP_GTrrNotPnt_nv_V4:
-    case Hexagon::JMP_GTrrPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrPnt_ie_nv_V4:
-    case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
-
-    // JMP_GTrrdn
-    case Hexagon::JMP_GTrrdnPt_nv_V4:
-    case Hexagon::JMP_GTrrdnPnt_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-    case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
-
-    // JMP_GTUri
-    case Hexagon::JMP_GTUriPt_nv_V4:
-    case Hexagon::JMP_GTUriPnt_nv_V4:
-    case Hexagon::JMP_GTUriNotPt_nv_V4:
-    case Hexagon::JMP_GTUriNotPnt_nv_V4:
-    case Hexagon::JMP_GTUriPt_ie_nv_V4:
-    case Hexagon::JMP_GTUriPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
-
-    // JMP_GTUrr
-    case Hexagon::JMP_GTUrrPt_nv_V4:
-    case Hexagon::JMP_GTUrrPnt_nv_V4:
-    case Hexagon::JMP_GTUrrNotPt_nv_V4:
-    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
-    case Hexagon::JMP_GTUrrPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
-
-    // JMP_GTUrrdn
-    case Hexagon::JMP_GTUrrdnPt_nv_V4:
-    case Hexagon::JMP_GTUrrdnPnt_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
-    case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
-      return true;
-  }
+bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const {
+  return MI->getDesc().isBranch();
 }
 
 bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
@@ -746,11 +665,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STrib_abs_cdnPt_nv_V4:
     case Hexagon::STrib_abs_cNotPt_nv_V4:
     case Hexagon::STrib_abs_cdnNotPt_nv_V4:
-    case Hexagon::STrib_imm_abs_nv_V4:
-    case Hexagon::STrib_imm_abs_cPt_nv_V4:
-    case Hexagon::STrib_imm_abs_cdnPt_nv_V4:
-    case Hexagon::STrib_imm_abs_cNotPt_nv_V4:
-    case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4:
 
     // Store Halfword
     case Hexagon::STrih_nv_V4:
@@ -784,11 +698,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STrih_abs_cdnPt_nv_V4:
     case Hexagon::STrih_abs_cNotPt_nv_V4:
     case Hexagon::STrih_abs_cdnNotPt_nv_V4:
-    case Hexagon::STrih_imm_abs_nv_V4:
-    case Hexagon::STrih_imm_abs_cPt_nv_V4:
-    case Hexagon::STrih_imm_abs_cdnPt_nv_V4:
-    case Hexagon::STrih_imm_abs_cNotPt_nv_V4:
-    case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4:
 
     // Store Word
     case Hexagon::STriw_nv_V4:
@@ -822,11 +731,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STriw_abs_cdnPt_nv_V4:
     case Hexagon::STriw_abs_cNotPt_nv_V4:
     case Hexagon::STriw_abs_cdnNotPt_nv_V4:
-    case Hexagon::STriw_imm_abs_nv_V4:
-    case Hexagon::STriw_imm_abs_cPt_nv_V4:
-    case Hexagon::STriw_imm_abs_cdnPt_nv_V4:
-    case Hexagon::STriw_imm_abs_cNotPt_nv_V4:
-    case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4:
       return true;
   }
 }
@@ -1003,9 +907,6 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
   case Hexagon::ZXTB:
   case Hexagon::ZXTH:
     return Subtarget.hasV4TOps();
-
-  case Hexagon::JMPR:
-    return false;
   }
 
   return true;
@@ -1030,6 +931,12 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
 //  cNotPt  ---> cNotPt_nv
 //  cPt     ---> cPt_nv
 unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+  int InvPredOpcode;
+  InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+                                        : Hexagon::getTruePredOpcode(Opc);
+  if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+    return InvPredOpcode;
+
   switch(Opc) {
     default: llvm_unreachable("Unexpected predicated instruction");
     case Hexagon::TFR_cPt:
@@ -1042,10 +949,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
     case Hexagon::TFRI_cNotPt:
       return Hexagon::TFRI_cPt;
 
-    case Hexagon::JMP_c:
-      return Hexagon::JMP_cNot;
-    case Hexagon::JMP_cNot:
-      return Hexagon::JMP_c;
+    case Hexagon::JMP_t:
+      return Hexagon::JMP_f;
+    case Hexagon::JMP_f:
+      return Hexagon::JMP_t;
 
     case Hexagon::ADD_ri_cPt:
       return Hexagon::ADD_ri_cNotPt;
@@ -1113,10 +1020,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
       return Hexagon::ZXTH_cPt_V4;
 
 
-    case Hexagon::JMPR_cPt:
-      return Hexagon::JMPR_cNotPt;
-    case Hexagon::JMPR_cNotPt:
-      return Hexagon::JMPR_cPt;
+    case Hexagon::JMPR_t:
+      return Hexagon::JMPR_f;
+    case Hexagon::JMPR_f:
+      return Hexagon::JMPR_t;
 
   // V4 indexed+scaled load.
     case Hexagon::LDrid_indexed_shl_cPt_V4:
@@ -1362,117 +1269,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
       return Hexagon::DEALLOC_RET_cNotPt_V4;
     case Hexagon::DEALLOC_RET_cNotPt_V4:
       return Hexagon::DEALLOC_RET_cPt_V4;
-
-   // New Value Jump.
-   // JMPEQ_ri - with -1.
-    case Hexagon::JMP_EQriPtneg_nv_V4:
-      return Hexagon::JMP_EQriNotPtneg_nv_V4;
-    case Hexagon::JMP_EQriNotPtneg_nv_V4:
-      return Hexagon::JMP_EQriPtneg_nv_V4;
-
-    case Hexagon::JMP_EQriPntneg_nv_V4:
-      return Hexagon::JMP_EQriNotPntneg_nv_V4;
-    case Hexagon::JMP_EQriNotPntneg_nv_V4:
-      return Hexagon::JMP_EQriPntneg_nv_V4;
-
-   // JMPEQ_ri.
-     case Hexagon::JMP_EQriPt_nv_V4:
-      return Hexagon::JMP_EQriNotPt_nv_V4;
-    case Hexagon::JMP_EQriNotPt_nv_V4:
-      return Hexagon::JMP_EQriPt_nv_V4;
-
-     case Hexagon::JMP_EQriPnt_nv_V4:
-      return Hexagon::JMP_EQriNotPnt_nv_V4;
-    case Hexagon::JMP_EQriNotPnt_nv_V4:
-      return Hexagon::JMP_EQriPnt_nv_V4;
-
-   // JMPEQ_rr.
-     case Hexagon::JMP_EQrrPt_nv_V4:
-      return Hexagon::JMP_EQrrNotPt_nv_V4;
-    case Hexagon::JMP_EQrrNotPt_nv_V4:
-      return Hexagon::JMP_EQrrPt_nv_V4;
-
-     case Hexagon::JMP_EQrrPnt_nv_V4:
-      return Hexagon::JMP_EQrrNotPnt_nv_V4;
-    case Hexagon::JMP_EQrrNotPnt_nv_V4:
-      return Hexagon::JMP_EQrrPnt_nv_V4;
-
-   // JMPGT_ri - with -1.
-    case Hexagon::JMP_GTriPtneg_nv_V4:
-      return Hexagon::JMP_GTriNotPtneg_nv_V4;
-    case Hexagon::JMP_GTriNotPtneg_nv_V4:
-      return Hexagon::JMP_GTriPtneg_nv_V4;
-
-    case Hexagon::JMP_GTriPntneg_nv_V4:
-      return Hexagon::JMP_GTriNotPntneg_nv_V4;
-    case Hexagon::JMP_GTriNotPntneg_nv_V4:
-      return Hexagon::JMP_GTriPntneg_nv_V4;
-
-   // JMPGT_ri.
-     case Hexagon::JMP_GTriPt_nv_V4:
-      return Hexagon::JMP_GTriNotPt_nv_V4;
-    case Hexagon::JMP_GTriNotPt_nv_V4:
-      return Hexagon::JMP_GTriPt_nv_V4;
-
-     case Hexagon::JMP_GTriPnt_nv_V4:
-      return Hexagon::JMP_GTriNotPnt_nv_V4;
-    case Hexagon::JMP_GTriNotPnt_nv_V4:
-      return Hexagon::JMP_GTriPnt_nv_V4;
-
-   // JMPGT_rr.
-     case Hexagon::JMP_GTrrPt_nv_V4:
-      return Hexagon::JMP_GTrrNotPt_nv_V4;
-    case Hexagon::JMP_GTrrNotPt_nv_V4:
-      return Hexagon::JMP_GTrrPt_nv_V4;
-
-     case Hexagon::JMP_GTrrPnt_nv_V4:
-      return Hexagon::JMP_GTrrNotPnt_nv_V4;
-    case Hexagon::JMP_GTrrNotPnt_nv_V4:
-      return Hexagon::JMP_GTrrPnt_nv_V4;
-
-   // JMPGT_rrdn.
-     case Hexagon::JMP_GTrrdnPt_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPt_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
-      return Hexagon::JMP_GTrrdnPt_nv_V4;
-
-     case Hexagon::JMP_GTrrdnPnt_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-      return Hexagon::JMP_GTrrdnPnt_nv_V4;
-
-   // JMPGTU_ri.
-     case Hexagon::JMP_GTUriPt_nv_V4:
-      return Hexagon::JMP_GTUriNotPt_nv_V4;
-    case Hexagon::JMP_GTUriNotPt_nv_V4:
-      return Hexagon::JMP_GTUriPt_nv_V4;
-
-     case Hexagon::JMP_GTUriPnt_nv_V4:
-      return Hexagon::JMP_GTUriNotPnt_nv_V4;
-    case Hexagon::JMP_GTUriNotPnt_nv_V4:
-      return Hexagon::JMP_GTUriPnt_nv_V4;
-
-   // JMPGTU_rr.
-     case Hexagon::JMP_GTUrrPt_nv_V4:
-      return Hexagon::JMP_GTUrrNotPt_nv_V4;
-    case Hexagon::JMP_GTUrrNotPt_nv_V4:
-      return Hexagon::JMP_GTUrrPt_nv_V4;
-
-     case Hexagon::JMP_GTUrrPnt_nv_V4:
-      return Hexagon::JMP_GTUrrNotPnt_nv_V4;
-    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
-      return Hexagon::JMP_GTUrrPnt_nv_V4;
-
-   // JMPGTU_rrdn.
-     case Hexagon::JMP_GTUrrdnPt_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
-      return Hexagon::JMP_GTUrrdnPt_nv_V4;
-
-     case Hexagon::JMP_GTUrrdnPnt_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
-      return Hexagon::JMP_GTUrrdnPnt_nv_V4;
   }
 }
 
@@ -1499,14 +1295,9 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
     return !invertPredicate ? Hexagon::TFRI_cPt :
                               Hexagon::TFRI_cNotPt;
   case Hexagon::JMP:
-    return !invertPredicate ? Hexagon::JMP_c :
-                              Hexagon::JMP_cNot;
-  case Hexagon::JMP_EQrrPt_nv_V4:
-    return !invertPredicate ? Hexagon::JMP_EQrrPt_nv_V4 :
-                              Hexagon::JMP_EQrrNotPt_nv_V4;
-  case Hexagon::JMP_EQriPt_nv_V4:
-    return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 :
-                              Hexagon::JMP_EQriNotPt_nv_V4;
+    return !invertPredicate ? Hexagon::JMP_t :
+                              Hexagon::JMP_f;
+
   case Hexagon::COMBINE_rr:
     return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
                               Hexagon::COMBINE_rr_cNotPt;
@@ -1530,8 +1321,8 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
                               Hexagon::ZXTH_cNotPt_V4;
 
   case Hexagon::JMPR:
-    return !invertPredicate ? Hexagon::JMPR_cPt :
-                              Hexagon::JMPR_cNotPt;
+    return !invertPredicate ? Hexagon::JMPR_t :
+                              Hexagon::JMPR_f;
 
   // V4 indexed+scaled load.
   case Hexagon::LDrid_indexed_shl_V4:
@@ -1830,11 +1621,15 @@ PredicateInstruction(MachineInstr *MI,
   // It is better to have an assert here to check this. But I don't know how
   // to write this assert because findFirstPredOperandIdx() would return -1
   if (oper < -1) oper = -1;
+
   MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
-                                          PredMO.isImplicit(), PredMO.isKill(),
+                                          PredMO.isImplicit(), false,
                                           PredMO.isDead(), PredMO.isUndef(),
                                           PredMO.isDebug());
 
+  MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo();
+  RegInfo.clearKillFlags(PredMO.getReg());
+
   if (hasGAOpnd)
   {
     unsigned int i;
@@ -1883,13 +1678,41 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
   return true;
 }
 
-
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
 bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
 
   return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
 }
 
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+
+  return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+
+  assert(isPredicated(MI));
+  return (!((F >> HexagonII::PredicatedFalsePos) &
+            HexagonII::PredicatedFalseMask));
+}
+
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+
+  // Make sure that the instruction is predicated.
+  assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+  return (!((F >> HexagonII::PredicatedFalsePos) &
+            HexagonII::PredicatedFalseMask));
+}
+
 bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
 
@@ -1897,6 +1720,13 @@ bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
   return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
 }
 
+bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+
+  assert(isPredicated(Opcode));
+  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
 bool
 HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
                                    std::vector<MachineOperand> &Pred) const {
@@ -2129,14 +1959,10 @@ bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
     default: return false;
     case Hexagon::CMPEQrr:
     case Hexagon::CMPEQri:
-    case Hexagon::CMPLTrr:
     case Hexagon::CMPGTrr:
     case Hexagon::CMPGTri:
-    case Hexagon::CMPLTUrr:
     case Hexagon::CMPGTUrr:
     case Hexagon::CMPGTUri:
-    case Hexagon::CMPGEri:
-    case Hexagon::CMPGEUri:
       return true;
   }
 }
@@ -2369,6 +2195,18 @@ isConditionalStore (const MachineInstr* MI) const {
   }
 }
 
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+  if (isNewValue(MI) && isBranch(MI))
+    return true;
+  return false;
+}
+
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
 // Returns true, if any one of the operands is a dot new
 // insn, whether it is predicated dot new or register dot new.
 bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
@@ -2470,6 +2308,34 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
   return (ImmValue < MinValue || ImmValue > MaxValue);
 }
 
+// Returns the opcode to use when converting MI, which is a conditional jump,
+// into a conditional instruction which uses the .new value of the predicate.
+// We also use branch probabilities to add a hint to the jump.
+int
+HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
+                                  const
+                                  MachineBranchProbabilityInfo *MBPI) const {
+
+  // We assume that block can have at most two successors.
+  bool taken = false;
+  MachineBasicBlock *Src = MI->getParent();
+  MachineOperand *BrTarget = &MI->getOperand(1);
+  MachineBasicBlock *Dst = BrTarget->getMBB();
+
+  const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
+  if (Prediction >= BranchProbability(1,2))
+    taken = true;
+
+  switch (MI->getOpcode()) {
+  case Hexagon::JMP_t:
+    return taken ? Hexagon::JMP_tnew_t : Hexagon::JMP_tnew_nt;
+  case Hexagon::JMP_f:
+    return taken ? Hexagon::JMP_fnew_t : Hexagon::JMP_fnew_nt;
+
+  default:
+    llvm_unreachable("Unexpected jump instruction.");
+  }
+}
 // Returns true if a particular operand is extendable for an instruction.
 bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
                                          unsigned short OperandNum) const {
@@ -2574,3 +2440,18 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
   }
   return -1;
 }
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
+  return (Opcode == Hexagon::JMP_t) ||
+         (Opcode == Hexagon::JMP_f) ||
+         (Opcode == Hexagon::JMP_tnew_t) ||
+         (Opcode == Hexagon::JMP_fnew_t) ||
+         (Opcode == Hexagon::JMP_tnew_nt) ||
+         (Opcode == Hexagon::JMP_fnew_nt);
+}
+
+bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const {
+  return (Opcode == Hexagon::JMP_f) ||
+         (Opcode == Hexagon::JMP_fnew_t) ||
+         (Opcode == Hexagon::JMP_fnew_nt);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 5df13a8..b721da4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,9 +16,9 @@
 
 #include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
-
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "HexagonGenInstrInfo.inc"
@@ -28,6 +28,8 @@ namespace llvm {
 class HexagonInstrInfo : public HexagonGenInstrInfo {
   const HexagonRegisterInfo RI;
   const HexagonSubtarget& Subtarget;
+  typedef unsigned Opcode_t;
+
 public:
   explicit HexagonInstrInfo(HexagonSubtarget &ST);
 
@@ -111,6 +113,7 @@ public:
 
   unsigned createVR(MachineFunction* MF, MVT VT) const;
 
+  virtual bool isBranch(const MachineInstr *MI) const;
   virtual bool isPredicable(MachineInstr *MI) const;
   virtual bool
   PredicateInstruction(MachineInstr *MI,
@@ -127,7 +130,11 @@ public:
                                    const BranchProbability &Probability) const;
 
   virtual bool isPredicated(const MachineInstr *MI) const;
+  virtual bool isPredicated(unsigned Opcode) const;
+  virtual bool isPredicatedTrue(const MachineInstr *MI) const;
+  virtual bool isPredicatedTrue(unsigned Opcode) const;
   virtual bool isPredicatedNew(const MachineInstr *MI) const;
+  virtual bool isPredicatedNew(unsigned Opcode) const;
   virtual bool DefinesPredicate(MachineInstr *MI,
                                 std::vector<MachineOperand> &Pred) const;
   virtual bool
@@ -176,6 +183,7 @@ public:
   bool isConditionalLoad (const MachineInstr* MI) const;
   bool isConditionalStore(const MachineInstr* MI) const;
   bool isNewValueInst(const MachineInstr* MI) const;
+  bool isNewValue(const MachineInstr* MI) const;
   bool isDotNewInst(const MachineInstr* MI) const;
   bool isDeallocRet(const MachineInstr *MI) const;
   unsigned getInvertedPredicatedOpcode(const int Opc) const;
@@ -189,6 +197,8 @@ public:
 
   void immediateExtend(MachineInstr *MI) const;
   bool isConstExtended(MachineInstr *MI) const;
+  int getDotNewPredJumpOp(MachineInstr *MI,
+                      const MachineBranchProbabilityInfo *MBPI) const;
   unsigned getAddrMode(const MachineInstr* MI) const;
   bool isOperandExtended(const MachineInstr *MI,
                          unsigned short OperandNum) const;
@@ -197,6 +207,9 @@ public:
   int getMaxValue(const MachineInstr *MI) const;
   bool NonExtEquivalentExists (const MachineInstr *MI) const;
   short getNonExtOpcode(const MachineInstr *MI) const;
+  bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
+  bool PredOpcodeHasNot(Opcode_t Opcode) const;
+
 private:
   int getMatchingCondBranchOpcode(int Opc, bool sense) const;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index 74dc0ca..2a4b17b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -14,6 +14,8 @@
 include "HexagonInstrFormats.td"
 include "HexagonOperands.td"
 
+//===----------------------------------------------------------------------===//
+
 // Multi-class for logical operators.
 multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
   def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
@@ -34,12 +36,6 @@ multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
                  [(set (i1 PredRegs:$dst),
                        (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>;
 }
-multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
-  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
-}
 
 multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
   let CextOpcode = CextOp in {
@@ -75,14 +71,6 @@ multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
   }
 }
 
-multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
-                 [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
-                                                   u8ExtPred:$c))]>;
-}
-
 multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
   def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
@@ -95,22 +83,30 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
 //===----------------------------------------------------------------------===//
 // ALU32/ALU (Instructions with register-register form)
 //===----------------------------------------------------------------------===//
-multiclass ALU32_Pbase<string mnemonic, bit isNot,
-                       bit isPredNew> {
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+  SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
 
-  let PNewValue = !if(isPredNew, "new", "") in
-  def NAME : ALU32_rr<(outs IntRegs:$dst),
+def HexagonWrapperCombineRR :
+  SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+                       bit isPredNew> {
+  let isPredicatedNew = isPredNew in
+  def NAME : ALU32_rr<(outs RC:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
             ") $dst = ")#mnemonic#"($src2, $src3)",
             []>;
 }
 
-multiclass ALU32_Pred<string mnemonic, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
-    defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
+multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let isPredicatedFalse = PredNot in {
+    defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
-    defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
+    defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>;
   }
 }
 
@@ -125,8 +121,8 @@ multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
                                               (i32 IntRegs:$src2)))]>;
 
     let neverHasSideEffects = 1, isPredicated = 1 in {
-      defm Pt : ALU32_Pred<mnemonic, 0>;
-      defm NotPt : ALU32_Pred<mnemonic, 1>;
+      defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>;
+      defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>;
     }
   }
 }
@@ -140,11 +136,42 @@ let isCommutable = 1 in {
 
 defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
 
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+class T_Combine : ALU32_rr<(outs DoubleRegs:$dst),
+                           (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = combine($src1, $src2)",
+            [(set (i64 DoubleRegs:$dst),
+              (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+                                            (i32 IntRegs:$src2))))]>;
+
+multiclass Combine_base {
+  let BaseOpcode = "combine" in {
+    def NAME : T_Combine;
+    let neverHasSideEffects = 1, isPredicated = 1 in {
+      defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>;
+      defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>;
+    }
+  }
+}
+
+defm COMBINE_rr : Combine_base, PredNewRel;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+  ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+  "$dst = combine(#$src1, #$src2)",
+  [(set (i64 DoubleRegs:$dst),
+        (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
+
 //===----------------------------------------------------------------------===//
 // ALU32/ALU (ADD with register-immediate form)
 //===----------------------------------------------------------------------===//
 multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : ALU32_ri<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
@@ -153,7 +180,7 @@ multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
 }
 
 multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
@@ -189,11 +216,6 @@ def OR_ri : ALU32_ri<(outs IntRegs:$dst),
             [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
                                           s10ExtPred:$src2))]>, ImmRegRel;
 
-def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
-            (ins IntRegs:$src1),
-            "$dst = not($src1)",
-            [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
-
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
 InputType = "imm", CextOpcode = "AND" in
 def AND_ri : ALU32_ri<(outs IntRegs:$dst),
@@ -201,10 +223,7 @@ def AND_ri : ALU32_ri<(outs IntRegs:$dst),
             "$dst = and($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
                                            s10ExtPred:$src2))]>, ImmRegRel;
-// Negate.
-def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-          "$dst = neg($src1)",
-          [(set (i32 IntRegs:$dst), (ineg (i32 IntRegs:$src1)))]>;
+
 // Nop.
 let neverHasSideEffects = 1 in
 def NOP : ALU32_rr<(outs), (ins),
@@ -220,15 +239,21 @@ def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
             [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
             ImmRegRel;
 
+// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
+def : Pat<(not (i32 IntRegs:$src1)),
+          (SUB_ri -1, (i32 IntRegs:$src1))>;
+
+// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs).
+// Pattern definition for 'neg' was not necessary.
 
 multiclass TFR_Pred<bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
                            (ins PredRegs:$src1, IntRegs:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
             []>;
     // Predicate new
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, IntRegs:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
@@ -274,10 +299,10 @@ class T_TFR64_Pred<bit PredNot, bit isPredNew>
 }
 
 multiclass TFR64_Pred<bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     def _c#NAME : T_TFR64_Pred<PredNot, 0>;
 
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
   }
 }
@@ -309,14 +334,14 @@ multiclass TFR64_base<string BaseName> {
 }
 
 multiclass TFRI_Pred<bit PredNot> {
-  let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
+  let isMoveImm = 1, isPredicatedFalse = PredNot in {
     def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
                            (ins PredRegs:$src1, s12Ext:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
             []>;
 
     // Predicate new
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, s12Ext:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
@@ -359,52 +384,6 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
 // ALU32/PERM +
 //===----------------------------------------------------------------------===//
 
-// Combine.
-
-def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-
-def HexagonWrapperCombineII :
-  SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineRR :
-  SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
-
-// Combines the two integer registers SRC1 and SRC2 into a double register.
-let isPredicable = 1 in
-def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
-                                                       IntRegs:$src2),
-  "$dst = combine($src1, $src2)",
-  [(set (i64 DoubleRegs:$dst),
-        (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
-                                      (i32 IntRegs:$src2))))]>;
-
-// Rd=combine(Rt.[HL], Rs.[HL])
-class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
-                                                   (ins IntRegs:$src1,
-                                                        IntRegs:$src2),
-  "$dst = combine($src1."# A #", $src2."# B #")", []>;
-
-let isPredicable = 1 in {
-  def COMBINE_hh : COMBINE_halves<"H", "H">;
-  def COMBINE_hl : COMBINE_halves<"H", "L">;
-  def COMBINE_lh : COMBINE_halves<"L", "H">;
-  def COMBINE_ll : COMBINE_halves<"L", "L">;
-}
-
-def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
-  (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
-              (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
-
-// Combines the two immediates SRC1 and SRC2 into a double register.
-class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
-  ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
-  "$dst = combine(#$src1, #$src2)",
-  [(set (i64 DoubleRegs:$dst),
-        (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
-
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
-def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
-
 // Mux.
 def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
                                                    DoubleRegs:$src2,
@@ -507,40 +486,24 @@ def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
 // ALU32/PRED +
 //===----------------------------------------------------------------------===//
 
-// Conditional combine.
-let neverHasSideEffects = 1, isPredicated = 1 in {
-def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if ($src1) $dst = combine($src2, $src3)",
-            []>;
-
-let isPredicatedFalse = 1 in
-def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if (!$src1) $dst = combine($src2, $src3)",
-            []>;
-
-let isPredicatedNew = 1 in
-def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if ($src1.new) $dst = combine($src2, $src3)",
-            []>;
-
-let isPredicatedNew = 1, isPredicatedFalse = 1 in
-def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if (!$src1.new) $dst = combine($src2, $src3)",
-            []>;
-}
-
 // Compare.
 defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
 defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
-defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
-defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
 defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
-defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
-defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM1Imm(imm);
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   uint32_t imm = N->getZExtValue();
+   return XformUToUM1Imm(imm);
+}]>;
 
 def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
     "$dst = cl0($src1)",
@@ -774,112 +737,153 @@ def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
 // CR -
 //===----------------------------------------------------------------------===//
 
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone,
+                      [SDNPHasChain]>;
 
-//===----------------------------------------------------------------------===//
-// J +
-//===----------------------------------------------------------------------===//
-// Jump to address.
-let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
-  def JMP : JInst< (outs),
-            (ins brtarget:$offset),
-            "jump $offset",
-            [(br bb:$offset)]>;
-}
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
 
-// if (p0) jump
-let isBranch = 1, isTerminator=1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_c : JInst< (outs),
-                 (ins PredRegs:$src, brtarget:$offset),
-                 "if ($src) jump $offset",
-                 [(brcond (i1 PredRegs:$src), bb:$offset)]>;
-}
+let InputType = "imm", isBarrier = 1, isPredicable = 1,
+Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+opExtentBits = 24 in
+class T_JMP <dag InsDag, list<dag> JumpList = []>
+            : JInst<(outs), InsDag,
+            "jump $dst" , JumpList> {
+    bits<24> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-25} = 0b100;
+    let Inst{24-16} = dst{23-15};
+    let Inst{13-1} = dst{14-2};
+}
+
+let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
+Defs = [PC], isPredicated = 1, opExtentBits = 17 in
+class T_JMP_c <bit PredNot, bit isPredNew, bit isTaken>:
+            JInst<(outs ), (ins PredRegs:$src, brtarget:$dst),
+            !if(PredNot, "if (!$src", "if ($src")#
+            !if(isPredNew, ".new) ", ") ")#"jump"#
+            !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+
+    let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
+    let isPredicatedFalse = PredNot;
+    let isPredicatedNew = isPredNew;
+    bits<2> src;
+    bits<17> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-24} = 0b1100;
+    let Inst{21} = PredNot;
+    let Inst{12} = !if(isPredNew, isTaken, zero);
+    let Inst{11} = isPredNew;
+    let Inst{9-8} = src;
+    let Inst{23-22} = dst{16-15};
+    let Inst{20-16} = dst{14-10};
+    let Inst{13} = dst{9};
+    let Inst{7-1} = dst{8-2};
+  }
 
-// if (!p0) jump
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cNot : JInst< (outs),
-                    (ins PredRegs:$src, brtarget:$offset),
-                    "if (!$src) jump $offset",
-                    []>;
+let isBarrier = 1, Defs = [PC], isPredicable = 1, InputType = "reg" in
+class T_JMPr<dag InsDag = (ins IntRegs:$dst)>
+            : JRInst<(outs ), InsDag,
+            "jumpr $dst" ,
+            []> {
+    bits<5> dst;
+
+    let IClass = 0b0101;
+    let Inst{27-21} = 0b0010100;
+    let Inst{20-16} = dst;
 }
 
-let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
-               "if ($pred) jump $dst",
-               []>;
+let Defs = [PC], isPredicated = 1, InputType = "reg" in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTaken>:
+            JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst),
+            !if(PredNot, "if (!$src", "if ($src")#
+            !if(isPredNew, ".new) ", ") ")#"jumpr"#
+            !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+
+    let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
+    let isPredicatedFalse = PredNot;
+    let isPredicatedNew = isPredNew;
+    bits<2> src;
+    bits<5> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-22} = 0b001101;
+    let Inst{21} = PredNot;
+    let Inst{20-16} = dst;
+    let Inst{12} = !if(isPredNew, isTaken, zero);
+    let Inst{11} = isPredNew;
+    let Inst{9-8} = src;
+    let Predicates = !if(isPredNew, [HasV3T], [HasV2T]);
+    let validSubTargets = !if(isPredNew, HasV3SubT, HasV2SubT);
 }
 
-// Jump to address conditioned on new predicate.
-// if (p0) jump:t
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnPt : JInst< (outs),
-                   (ins PredRegs:$src, brtarget:$offset),
-                   "if ($src.new) jump:t $offset",
-                   []>;
+multiclass JMP_Pred<bit PredNot> {
+  def _#NAME : T_JMP_c<PredNot, 0, 0>;
+  // Predicate new
+  def _#NAME#new_t  : T_JMP_c<PredNot, 1, 1>; // taken
+  def _#NAME#new_nt : T_JMP_c<PredNot, 1, 0>; // not taken
 }
 
-// if (!p0) jump:t
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnNotPt : JInst< (outs),
-                      (ins PredRegs:$src, brtarget:$offset),
-                      "if (!$src.new) jump:t $offset",
-                      []>;
+multiclass JMP_base<string BaseOp> {
+  let BaseOpcode = BaseOp in {
+    def NAME : T_JMP<(ins brtarget:$dst), [(br bb:$dst)]>;
+    defm t : JMP_Pred<0>;
+    defm f : JMP_Pred<1>;
+  }
 }
 
-// Not taken.
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnPnt : JInst< (outs),
-                    (ins PredRegs:$src, brtarget:$offset),
-                    "if ($src.new) jump:nt $offset",
-                    []>;
+multiclass JMPR_Pred<bit PredNot> {
+  def NAME: T_JMPr_c<PredNot, 0, 0>;
+  // Predicate new
+  def NAME#new_tV3  : T_JMPr_c<PredNot, 1, 1>; // taken
+  def NAME#new_ntV3 : T_JMPr_c<PredNot, 1, 0>; // not taken
 }
 
-// Not taken.
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnNotPnt : JInst< (outs),
-                       (ins PredRegs:$src, brtarget:$offset),
-                       "if (!$src.new) jump:nt $offset",
-                       []>;
+multiclass JMPR_base<string BaseOp> {
+  let BaseOpcode = BaseOp in {
+    def NAME : T_JMPr;
+    defm _t : JMPR_Pred<0>;
+    defm _f : JMPR_Pred<1>;
+  }
 }
-//===----------------------------------------------------------------------===//
-// J -
-//===----------------------------------------------------------------------===//
 
-//===----------------------------------------------------------------------===//
-// JR +
-//===----------------------------------------------------------------------===//
-def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
-                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+let isTerminator = 1, neverHasSideEffects = 1 in {
+let isBranch = 1 in
+defm JMP : JMP_base<"JMP">, PredNewRel;
 
-// Jump to address from register.
-let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR: JRInst<(outs), (ins),
-                   "jumpr r31",
-                   [(retflag)]>;
-}
+let isBranch = 1, isIndirectBranch = 1 in
+defm JMPR : JMPR_base<"JMPr">, PredNewRel;
 
-// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
-                       "if ($src1) jumpr r31",
-                       []>;
+let isReturn = 1, isCodeGenOnly = 1 in
+defm JMPret : JMPR_base<"JMPret">, PredNewRel;
 }
 
-// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
-                          "if (!$src1) jumpr r31",
-                          []>;
-}
+def : Pat<(retflag),
+          (JMPret (i32 R31))>;
+
+def : Pat <(brcond (i1 PredRegs:$src1), bb:$offset),
+      (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
+
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, neverHasSideEffects = 1,
+isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
+
+def : Pat<(eh_return),
+          (EH_RETURN_JMPR (i32 R31))>;
+
+def : Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
+          (JMPR (i32 IntRegs:$dst))>;
+
+def : Pat<(brind (i32 IntRegs:$dst)),
+          (JMPR (i32 IntRegs:$dst))>;
 
 //===----------------------------------------------------------------------===//
 // JR -
@@ -892,7 +896,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
 // Load -- MEMri operand
 multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
                           bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
                        (ins PredRegs:$src1, MEMri:$addr),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -901,7 +905,7 @@ multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
 }
 
 multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
@@ -958,7 +962,7 @@ def : Pat < (i64 (load ADDRriS11_3:$addr)),
 // Load - Base with Immediate offset addressing mode
 multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
                      (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -968,7 +972,7 @@ multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
 
 multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -1038,7 +1042,7 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
 
 multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
                        (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1049,7 +1053,7 @@ multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1366,7 +1370,7 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
 
 multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2PI<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1377,7 +1381,7 @@ multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1431,7 +1435,7 @@ def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
 //===----------------------------------------------------------------------===//
 multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
                           bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1440,7 +1444,7 @@ multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
 
     // Predicate new
@@ -1497,7 +1501,7 @@ def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
 //===----------------------------------------------------------------------===//
 multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1507,7 +1511,7 @@ multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
 
 multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+  let isPredicatedFalse = PredNot, isPredicated = 1 in {
     defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
 
     // Predicate new
@@ -2023,20 +2027,18 @@ let isCall = 1, neverHasSideEffects = 1,
               []>;
  }
 
-// Tail Calls.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-  def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
-             "jump $dst // TAILCALL", []>;
-}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-  def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
-             "jump $dst // TAILCALL", []>;
-}
 
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-  def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
-             "jumpr $dst // TAILCALL", []>;
+// Indirect tail-call.
+let isCodeGenOnly = 1, isCall = 1, isReturn = 1  in
+def TCRETURNR : T_JMPr;
+
+// Direct tail-calls.
+let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+isTerminator = 1, isCodeGenOnly = 1 in {
+  def TCRETURNtg   : T_JMP<(ins calltarget:$dst)>;
+  def TCRETURNtext : T_JMP<(ins calltarget:$dst)>;
 }
+
 // Map call instruction.
 def : Pat<(call (i32 IntRegs:$dst)),
       (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
@@ -2133,10 +2135,11 @@ def : Pat <(add (i1 PredRegs:$src1), -1),
 
 // Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
 //   p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
 def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                    (i32 IntRegs:$src3),
                    (i32 IntRegs:$src4)),
-      (i32 (TFR_condset_rr (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+      (i32 (TFR_condset_rr (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)),
                            (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>,
       Requires<[HasV2TOnly]>;
 
@@ -2154,18 +2157,25 @@ def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2,
 
 // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
 // => r0 = TFR_condset_ir(p0, #i, r1)
-def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, s12ImmPred:$src3),
+def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3),
       (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3,
                            (i32 IntRegs:$src2)))>;
 
 // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
-def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
-      (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+      (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
 
 // Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
-def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
       (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
 
+
+let AddedComplexity = 100 in
+def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
+      (i64 (COMBINE_rr (TFRI 0),
+                       (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>,
+      Requires<[NoV4T]>;
+
 // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
 let AddedComplexity = 10 in
 def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
@@ -2186,43 +2196,46 @@ def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
                                                  subreg_loreg))))))>;
 
 // We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a JMP_cNot.
+// Map brcond with an unsupported setcc to a JMP_f.
 def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                         bb:$offset),
-      (JMP_cNot (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+      (JMP_f (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
                 bb:$offset)>;
 
 def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
                         bb:$offset),
-      (JMP_cNot (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
+      (JMP_f (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
 
 def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
-      (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+      (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
 
 def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
-      (JMP_c (i1 PredRegs:$src1), bb:$offset)>;
+      (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
 
+// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
 def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
                         bb:$offset),
-      (JMP_cNot (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2), bb:$offset)>;
+      (JMP_f (CMPGTri (i32 IntRegs:$src1),
+                (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>;
 
+// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
 def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                         bb:$offset),
-      (JMP_c (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>;
+      (JMP_t (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
 
 def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
                    bb:$offset),
-      (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
+      (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
                    bb:$offset)>;
 
 def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                         bb:$offset),
-      (JMP_cNot (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+      (JMP_f (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
                 bb:$offset)>;
 
 def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
                    bb:$offset),
-      (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+      (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
                 bb:$offset)>;
 
 // Map from a 64-bit select to an emulated 64-bit mux.
@@ -2300,8 +2313,8 @@ def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
 
 // Map cmple -> cmpgt.
 // rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ImmPred:$src2)),
-      (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ImmPred:$src2)))>;
+def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
+      (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ExtPred:$src2)))>;
 
 // rs <= rt -> !(rs > rt).
 def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
@@ -2314,8 +2327,8 @@ def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
 // Map cmpne -> cmpeq.
 // Hexagon_TODO: We should improve on this.
 // rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
-      (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2))))>;
+def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
+      (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ExtPred:$src2))))>;
 
 // Map cmpne(Rs) -> !cmpeqe(Rs).
 // rs != rt -> !(rs == rt).
@@ -2337,8 +2350,9 @@ def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
 def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
       (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
 
-def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ImmPred:$src2)),
-      (i1 (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2))>;
+// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
+def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
+      (i1 (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
 
 // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
 // rss >= rtt -> !(rtt > rss).
@@ -2347,9 +2361,10 @@ def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
                                 (i64 DoubleRegs:$src1)))))>;
 
 // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
 // rs < rt -> !(rs >= rt).
-def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
-      (i1 (NOT_p (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2)))>;
+def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
+      (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>;
 
 // Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
 // rs < rt -> rt > rs.
@@ -2373,13 +2388,17 @@ def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
 def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
       (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
 
-// Generate cmpgeu(Rs, #u8)
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ImmPred:$src2)),
-      (i1 (CMPGEUri (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)),
+      (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>;
+
+// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
+      (i1 (CMPGTUri (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
 
 // Generate cmpgtu(Rs, #u9)
-def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)),
-      (i1 (CMPGTUri (i32 IntRegs:$src1), u9ImmPred:$src2))>;
+def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
+      (i1 (CMPGTUri (i32 IntRegs:$src1), u9ExtPred:$src2))>;
 
 // Map from Rs >= Rt -> !(Rt > Rs).
 // rs >= rt -> !(rt > rs).
@@ -2391,7 +2410,7 @@ def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
 def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
       (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
 
-// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt).
 // Map from (Rs <= Rt) -> !(Rs > Rt).
 def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
       (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
@@ -2487,6 +2506,13 @@ def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
       (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
       Requires<[NoV4T]>;
 
+let AddedComplexity = 100 in
+def:  Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+      (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+                                  s11_2ExtPred:$offset)))>,
+      Requires<[NoV4T]>;
+
+let AddedComplexity = 10 in
 def:  Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
       (i32 (LDriw ADDRriS11_0:$src1))>;
 
@@ -2503,6 +2529,48 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
       (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>;
 
 
+let AddedComplexity = 100 in
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 (i32 (add IntRegs:$src2,
+                                         s11_2ExtPred:$offset2)))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw_indexed IntRegs:$src2,
+                                       s11_2ExtPred:$offset2)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw ADDRriS11_2:$srcLow)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zext (i32 IntRegs:$srcLow))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        IntRegs:$srcLow))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 (i32 (add IntRegs:$src2,
+                                         s11_2ExtPred:$offset2)))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw_indexed IntRegs:$src2,
+                                       s11_2ExtPred:$offset2)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw ADDRriS11_2:$srcLow)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zext (i32 IntRegs:$srcLow))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        IntRegs:$srcLow))>;
+
 // Any extended 64-bit load.
 // anyext i32 -> i64
 def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
@@ -2637,19 +2705,6 @@ let AddedComplexity = 100 in
 def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
       (COPY (i32 IntRegs:$src1))>;
 
-def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-
-let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
-def BR_JT : JRInst<(outs), (ins IntRegs:$src),
-                   "jumpr $src",
-                   [(HexagonBR_JT (i32 IntRegs:$src))]>;
-
-let isBranch=1, isIndirectBranch=1, isTerminator=1 in
-def BRIND : JRInst<(outs), (ins IntRegs:$src),
-                   "jumpr $src",
-                   [(brind (i32 IntRegs:$src))]>;
-
 def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
 
 def : Pat<(HexagonWrapperJT tjumptable:$dst),
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
index 157ab3d..7e75554 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -11,6 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
 
 //===----------------------------------------------------------------------===//
 // J +
@@ -40,41 +45,6 @@ let isCall = 1, neverHasSideEffects = 1,
               []>, Requires<[HasV3TOnly]>;
  }
 
-
-// Jump to address from register
-// if(p?.new) jumpr:t r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if ($src1.new) jumpr:t $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
-// if (!p?.new) jumpr:t r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if (!$src1.new) jumpr:t $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
-// Not taken.
-// if(p?.new) jumpr:nt r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if ($src1.new) jumpr:nt $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
-// if (!p?.new) jumpr:nt r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if (!$src1.new) jumpr:nt $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
 //===----------------------------------------------------------------------===//
 // JR -
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index cd0e475..933239d 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -209,105 +209,31 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
 //===----------------------------------------------------------------------===//
 // LD +
 //===----------------------------------------------------------------------===//
-//
-// These absolute set addressing mode instructions accept immediate as
-// an operand. We have duplicated these patterns to take global address.
-
+//===----------------------------------------------------------------------===//
+// Template class for load instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
 let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in {
-def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memd($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memb(Re=#U6)
-def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memb($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memh(Re=#U6)
-def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memub(Re=#U6)
-def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+validSubTargets = HasV4SubT in
+class T_LD_abs_set<string mnemonic, RegisterClass RC>:
+            LDInst2<(outs RC:$dst1, IntRegs:$dst2),
             (ins u0AlwaysExt:$addr),
-            "$dst1 = memub($dst2=##$addr)",
+            "$dst1 = "#mnemonic#"($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
-// Rd=memuh(Re=#U6)
-def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memuh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
+def LDrid_abs_set_V4  : T_LD_abs_set <"memd", DoubleRegs>;
+def LDrib_abs_set_V4  : T_LD_abs_set <"memb", IntRegs>;
+def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>;
+def LDrih_abs_set_V4  : T_LD_abs_set <"memh", IntRegs>;
+def LDriw_abs_set_V4  : T_LD_abs_set <"memw", IntRegs>;
+def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>;
 
-// Rd=memw(Re=#U6)
-def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memw($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-// Following patterns are defined for absolute set addressing mode
-// instruction which take global address as operand.
-let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in {
-def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memd($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memb(Re=#U6)
-def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memb($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memh(Re=#U6)
-def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memub(Re=#U6)
-def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memub($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memuh(Re=#U6)
-def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memuh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memw(Re=#U6)
-def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memw($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-}
 
 // multiclass for load instructions with base + register offset
 // addressing mode
 multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -316,7 +242,7 @@ multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
@@ -527,78 +453,29 @@ def:  Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
 // ST +
 //===----------------------------------------------------------------------===//
 ///
-/// Assumptions::: ****** DO NOT IGNORE ********
-/// 1. Make sure that in post increment store, the zero'th operand is always the
-///    post increment operand.
-/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
-///    last operand.
-///
-
-// memd(Re=#U)=Rtt
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
-def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins DoubleRegs:$src1, u0AlwaysExt:$src2),
-            "memd($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Re=#U)=Rs
-def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u0AlwaysExt:$src2),
-            "memb($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memh(Re=#U)=Rs
-def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u0AlwaysExt:$src2),
-            "memh($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memw(Re=#U)=Rs
-def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u0AlwaysExt:$src2),
-            "memw($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-// memd(Re=#U)=Rtt
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
-def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins DoubleRegs:$src1, globaladdressExt:$src2),
-            "memd($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Re=#U)=Rs
-def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdressExt:$src2),
-            "memb($dst1=##$src2) = $src1",
+//===----------------------------------------------------------------------===//
+// Template class for store instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
+class T_ST_abs_set<string mnemonic, RegisterClass RC>:
+            STInst2<(outs IntRegs:$dst1),
+            (ins RC:$src1, u0AlwaysExt:$src2),
+            mnemonic#"($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memh(Re=#U)=Rs
-def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdressExt:$src2),
-            "memh($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memw(Re=#U)=Rs
-def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdressExt:$src2),
-            "memw($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-}
+def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>;
+def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>;
+def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>;
+def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>;
 
+//===----------------------------------------------------------------------===//
 // multiclass for store instructions with base + register offset addressing
 // mode
+//===----------------------------------------------------------------------===//
 multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
                  RC:$src5),
@@ -609,7 +486,7 @@ multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
@@ -637,7 +514,7 @@ multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
 // addressing mode.
 multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
                  RC:$src5),
@@ -648,7 +525,7 @@ multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
@@ -711,17 +588,59 @@ def : Pat<(store (i64 DoubleRegs:$src4),
                                 u2ImmPred:$src3, DoubleRegs:$src4)>;
 }
 
-// memd(Ru<<#u2+#U6)=Rtt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
-validSubTargets = HasV4SubT in
-def STrid_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
-            "memd($src1<<#$src2+#$src3) = $src4",
-            [(store (i64 DoubleRegs:$src4),
+let isExtended = 1, opExtendable = 2 in
+class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> :
+            STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4),
+            mnemonic#"($src1<<#$src2+##$src3) = $src4",
+            [(stOp (VT RC:$src4),
                     (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
                          u0AlwaysExtPred:$src3))]>,
             Requires<[HasV4T]>;
 
+let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in
+class T_ST_LongOff_nv <string mnemonic> :
+            NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+            mnemonic#"($src1<<#$src2+##$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> {
+  let  BaseOpcode = BaseOp#"_shl" in {
+    let isNVStorable = 1 in
+    def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>;
+
+    def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>;
+  }
+}
+
+let AddedComplexity = 10, validSubTargets = HasV4SubT in {
+  def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>;
+  defm STrib_shl   : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel;
+  defm STrih_shl   : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel;
+  defm STriw_shl   : ST_LongOff <"memw", "STriw", store>, NewValueRel;
+}
+
+let AddedComplexity = 40 in
+multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT,
+                           PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+           (add (shl IntRegs:$src1, u2ImmPred:$src2),
+               (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+           (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+           (add IntRegs:$src1,
+               (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+           (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>;
+defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>;
+defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>;
+defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>;
+
 // memd(Rx++#s4:3)=Rtt
 // memd(Rx++#s4:3:circ(Mu))=Rtt
 // memd(Rx++I:circ(Mu))=Rtt
@@ -741,7 +660,7 @@ def STrid_shl_V4 : STInst<(outs),
 //===----------------------------------------------------------------------===//
 multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
                         bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -751,7 +670,7 @@ multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
 }
 
 multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
@@ -799,17 +718,6 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
            (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
            Requires<[HasV4T]>;
 
-// memb(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrib_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memb($src1<<#$src2+#$src3) = $src4",
-            [(truncstorei8 (i32 IntRegs:$src4),
-                           (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                                u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
 // memb(Rx++#s4:0:circ(Mu))=Rt
 // memb(Rx++I:circ(Mu))=Rt
 // memb(Rx++Mu)=Rt
@@ -830,17 +738,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
 // TODO: needs to be implemented.
 
 // memh(Ru<<#u2+#U6)=Rt.H
-// memh(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrih_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memh($src1<<#$src2+#$src3) = $src4",
-            [(truncstorei16 (i32 IntRegs:$src4),
-                            (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                                 u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
 // memh(Rx++#s4:1:circ(Mu))=Rt.H
 // memh(Rx++#s4:1:circ(Mu))=Rt
 // memh(Rx++I:circ(Mu))=Rt.H
@@ -877,17 +774,6 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
            (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
            Requires<[HasV4T]>;
 
-// memw(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STriw_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memw($src1<<#$src2+#$src3) = $src4",
-            [(store (i32 IntRegs:$src4),
-                    (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                              u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
 // memw(Rx++#s4:2)=Rt
 // memw(Rx++#s4:2:circ(Mu))=Rt
 // memw(Rx++I:circ(Mu))=Rt
@@ -907,7 +793,7 @@ def STriw_shl_V4 : STInst<(outs),
 //
 multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
                             Operand predImmOp, bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -918,7 +804,7 @@ multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
 
 multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
                            bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -960,7 +846,7 @@ let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
 // and MEMri operand.
 multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                           bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -970,7 +856,7 @@ multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
 
     // Predicate new
@@ -1006,15 +892,6 @@ mayStore = 1 in {
   defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
 }
 
-// memb(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrib_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memb($src1<<#$src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 //===----------------------------------------------------------------------===//
 // Post increment store
 // mem[bhwd](Rx++#s4:[0123])=Nt.new
@@ -1022,7 +899,7 @@ def STrib_shl_nv_V4 : NVInst_V4<(outs),
 
 multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1034,7 +911,7 @@ multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1072,29 +949,11 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
 // memb(Rx++I:circ(Mu))=Nt.new
 // memb(Rx++Mu)=Nt.new
 // memb(Rx++Mu:brev)=Nt.new
-// memh(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrih_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memh($src1<<#$src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memh(Rx++#s4:1:circ(Mu))=Nt.new
 // memh(Rx++I:circ(Mu))=Nt.new
 // memh(Rx++Mu)=Nt.new
 // memh(Rx++Mu:brev)=Nt.new
 
-// memw(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STriw_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memw($src1<<#$src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memw(Rx++#s4:2:circ(Mu))=Nt.new
 // memw(Rx++I:circ(Mu))=Nt.new
 // memw(Rx++Mu)=Nt.new
@@ -1108,179 +967,193 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs),
 // NV/J +
 //===----------------------------------------------------------------------===//
 
-multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, $src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps with the register
+// operands.
+//===----------------------------------------------------------------------===//
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, $src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
-}
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
+                      bit isNegCond, bit isTaken>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic#
+    "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
+    "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
+    #!if(isTaken, "t","nt")#" $offset",
+    []>, Requires<[HasV4T]> {
 
-multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr,
-                                                   string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1, $src2.new)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+      bits<5> src1;
+      bits<5> src2;
+      bits<3> Ns;    // New-Value Operand
+      bits<5> RegOp; // Non New-Value Operand
+      bits<11> offset;
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1, $src2.new)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
-}
+      let isBrTaken = !if(isTaken, "true", "false");
+      let isPredicatedFalse = isNegCond;
 
-multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+      let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
+      let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+      let IClass = 0b0010;
+      let Inst{26} = 0b0;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = Ns;
+      let Inst{13} = isTaken;
+      let Inst{12-8} = RegOp;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
 }
 
-multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
+                       bit isNegCond> {
+  // Branch not taken:
+  def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
+  // Branch taken:
+  def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
 }
 
-multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr,
-                                                string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+// NvOpNum = 0 -> First Operand is a new-value Register
+// NvOpNum = 1 -> Second Operand is a new-value Register
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
+                       bit NvOpNum> {
+  let BaseOpcode = BaseOp#_NVJ in {
+    defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
+    defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
+  }
 }
 
-// Multiclass for regular dot new of Ist operand register.
-multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> {
-  defm Pt  : NVJ_type_basic_reg<NotStr, OpcStr, "t">;
-  defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">;
-}
+// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
 
-// Multiclass for dot new of 2nd operand register.
-multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> {
-  defm Pt  : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">;
-  defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">;
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+  Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+  defm CMPEQrr  : NVJrr_base<"cmp.eq",  "CMPEQ",  0b000, 0>, PredRel;
+  defm CMPGTrr  : NVJrr_base<"cmp.gt",  "CMPGT",  0b001, 0>, PredRel;
+  defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
+  defm CMPLTrr  : NVJrr_base<"cmp.gt",  "CMPLT",  0b011, 1>, PredRel;
+  defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
 }
 
-// Multiclass for 2nd operand immediate, including -1.
-multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> {
-  defm Pt     : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
-  defm Pnt    : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
-  defm Ptneg  : NVJ_type_basic_neg<NotStr, OpcStr, "t">;
-  defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">;
-}
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an unsigned immediate (U5) operand.
+//===----------------------------------------------------------------------===//
 
-// Multiclass for 2nd operand immediate, excluding -1.
-multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> {
-  defm Pt     : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
-  defm Pnt    : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
-}
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
+                         bit isTaken>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
+    #!if(isTaken, "t","nt")#" $offset",
+    []>, Requires<[HasV4T]> {
 
-// Multiclass for tstbit, where 2nd operand is always #0.
-multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> {
-  defm Pt     : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">;
-  defm Pnt    : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">;
+      let isPredicatedFalse = isNegCond;
+      let isBrTaken = !if(isTaken, "true", "false");
+
+      bits<3> src1;
+      bits<5> src2;
+      bits<11> offset;
+
+      let IClass = 0b0010;
+      let Inst{26} = 0b1;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = src1;
+      let Inst{13} = isTaken;
+      let Inst{12-8} = src2;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
 }
 
-// Multiclass for GT.
-multiclass NVJ_type_rr_ri<string OpcStr> {
-  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
-  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
-  defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
-  defm rrdn    : NVJ_type_br_pred_2ndDotNew<"",  OpcStr>;
-  defm riNot   : NVJ_type_br_pred_imm<"!", OpcStr>;
-  defm ri      : NVJ_type_br_pred_imm<"",  OpcStr>;
+multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
+  // Branch not taken:
+  def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>;
+  // Branch taken:
+  def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>;
 }
 
-// Multiclass for EQ.
-multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> {
-  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
-  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
-  defm riNot   : NVJ_type_br_pred_imm<"!", OpcStr>;
-  defm ri      : NVJ_type_br_pred_imm<"",  OpcStr>;
+multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
+  let BaseOpcode = BaseOp#_NVJri in {
+    defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
+    defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
+  }
 }
 
-// Multiclass for GTU.
-multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> {
-  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
-  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
-  defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
-  defm rrdn    : NVJ_type_br_pred_2ndDotNew<"",  OpcStr>;
-  defm riNot   : NVJ_type_br_pred_imm_only<"!", OpcStr>;
-  defm ri      : NVJ_type_br_pred_imm_only<"",  OpcStr>;
+// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+  Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+  defm CMPEQri  : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
+  defm CMPGTri  : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
+  defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
 }
 
-// Multiclass for tstbit.
-multiclass NVJ_type_r0<string OpcStr> {
-  defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>;
-  defm r0    : NVJ_type_br_pred_tstbit<"",  OpcStr>;
- }
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an hardcoded 0/-1 immediate value.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in
+class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
+                            bit isNegCond, bit isTaken>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic
+    #"($src1.new, #"#ImmVal#")) jump:"
+    #!if(isTaken, "t","nt")#" $offset",
+    []>, Requires<[HasV4T]> {
 
-// Base Multiclass for New Value Jump.
-multiclass NVJ_type {
-  defm GT     : NVJ_type_rr_ri<"cmp.gt">;
-  defm EQ     : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">;
-  defm GTU    : NVJ_type_rr_ri_no_nOne<"cmp.gtu">;
-  defm TSTBIT : NVJ_type_r0<"tstbit">;
+      let isPredicatedFalse = isNegCond;
+      let isBrTaken = !if(isTaken, "true", "false");
+
+      bits<3> src1;
+      bits<11> offset;
+      let IClass = 0b0010;
+      let Inst{26} = 0b1;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = src1;
+      let Inst{13} = isTaken;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
 }
 
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
-  defm JMP_ : NVJ_type;
+multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
+                             bit isNegCond> {
+  // Branch not taken:
+  def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
+  // Branch taken:
+  def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
 }
 
-//===----------------------------------------------------------------------===//
-// NV/J -
-//===----------------------------------------------------------------------===//
+multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
+                             string ImmVal> {
+  let BaseOpcode = BaseOp#_NVJ_ConstImm in {
+  defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True cond
+  defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False Cond
+  }
+}
+
+// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2
+// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
+  Defs = [PC], neverHasSideEffects = 1 in {
+  defm TSTBIT0  : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
+  defm CMPEQn1  : NVJ_ConstImm_base<"cmp.eq", "CMPEQ",  0b100, "-1">, PredRel;
+  defm CMPGTn1  : NVJ_ConstImm_base<"cmp.gt", "CMPGT",  0b101, "-1">, PredRel;
+}
 
 //===----------------------------------------------------------------------===//
 // XTYPE/ALU +
@@ -2286,7 +2159,7 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
 
 def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
                        bb:$offset),
-      (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+      (JMP_f (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
                 bb:$offset)>,
       Requires<[HasV4T]>;
 
@@ -2769,9 +2642,9 @@ let isReturn = 1, isTerminator = 1,
 
 multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
             ") ")#mnemonic#"(##$absaddr) = $src2",
             []>,
@@ -2779,7 +2652,7 @@ multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -2791,7 +2664,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let opExtendable = 0, isPredicable = 1 in
     def NAME#_V4 : STInst2<(outs),
-            (ins globaladdressExt:$absaddr, RC:$src),
+            (ins u0AlwaysExt:$absaddr, RC:$src),
             mnemonic#"(##$absaddr) = $src",
             []>,
             Requires<[HasV4T]>;
@@ -2805,9 +2678,9 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
 
 multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
             ") ")#mnemonic#"(##$absaddr) = $src2.new",
             []>,
@@ -2815,7 +2688,7 @@ multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
@@ -2827,7 +2700,7 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let opExtendable = 0, isPredicable = 1 in
     def NAME#_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdressExt:$absaddr, RC:$src),
+            (ins u0AlwaysExt:$absaddr, RC:$src),
             mnemonic#"(##$absaddr) = $src.new",
             []>,
             Requires<[HasV4T]>;
@@ -2840,16 +2713,19 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
 }
 
 let addrMode = Absolute in {
+  let accessSize = ByteAccess in
     defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
                      ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
 
+  let accessSize = HalfWordAccess in
     defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
                      ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
 
+  let accessSize = WordAccess in
     defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
                      ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
 
-  let isNVStorable = 0 in
+  let accessSize = DoubleWordAccess, isNVStorable = 0 in
     defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
 }
 
@@ -2875,6 +2751,7 @@ def : Pat<(store (i64 DoubleRegs:$src1),
 // mem[bhwd](#global)=Rt
 // if ([!]Pv[.new]) mem[bhwd](##global) = Rt
 //===----------------------------------------------------------------------===//
+let mayStore = 1, isNVStorable = 1 in
 multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
   let BaseOpcode = BaseOp, isPredicable = 1 in
   def NAME#_V4 : STInst2<(outs),
@@ -2909,15 +2786,16 @@ multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
   }
 }
 
-let validSubTargets = HasV4SubT,  validSubTargets = HasV4SubT in {
-defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
-              ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
-defm STb_GP : ST_GP<"memb",  "STb_GP", IntRegs>,
-              ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
-defm STh_GP : ST_GP<"memh",  "STh_GP", IntRegs>,
-              ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
-defm STw_GP : ST_GP<"memw",  "STw_GP", IntRegs>,
-              ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
+let validSubTargets = HasV4SubT, neverHasSideEffects = 1 in {
+  let isNVStorable = 0 in
+  defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, PredNewRel;
+
+  defm STb_GP : ST_GP<"memb",  "STb_GP", IntRegs>,
+                ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel;
+  defm STh_GP : ST_GP<"memh",  "STh_GP", IntRegs>,
+                ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel;
+  defm STw_GP : ST_GP<"memw",  "STw_GP", IntRegs>,
+                ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel;
 }
 
 // 64 bit atomic store
@@ -2974,9 +2852,9 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
 //===----------------------------------------------------------------------===//
 multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
-            (ins PredRegs:$src1, globaladdressExt:$absaddr),
+            (ins PredRegs:$src1, u0AlwaysExt:$absaddr),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
             ") ")#"$dst = "#mnemonic#"(##$absaddr)",
             []>,
@@ -2984,7 +2862,7 @@ multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -2996,7 +2874,7 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let  opExtendable = 1, isPredicable = 1 in
     def NAME#_V4 : LDInst2<(outs RC:$dst),
-            (ins globaladdressExt:$absaddr),
+            (ins u0AlwaysExt:$absaddr),
             "$dst = "#mnemonic#"(##$absaddr)",
             []>,
             Requires<[HasV4T]>;
@@ -3009,33 +2887,37 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
 }
 
 let addrMode = Absolute in {
+  let accessSize = ByteAccess in {
     defm LDrib_abs  : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
     defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
+  }
+  let accessSize = HalfWordAccess in {
     defm LDrih_abs  : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
     defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+  }
+  let accessSize = WordAccess in
     defm LDriw_abs  : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
+
+  let accessSize = DoubleWordAccess in
     defm LDrid_abs : LD_Abs<"memd",  "LDrid", DoubleRegs>, AddrModeRel;
 }
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
+let Predicates = [HasV4T], AddedComplexity  = 30 in {
 def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriw_abs_V4 tglobaladdr: $absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDrib_abs_V4 tglobaladdr:$absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriub_abs_V4 tglobaladdr:$absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDrih_abs_V4 tglobaladdr:$absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+}
 
 //===----------------------------------------------------------------------===//
 // multiclass for load instructions with GP-relative addressing mode.
@@ -3058,12 +2940,12 @@ multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
   }
 }
 
-defm LDd_GP  : LD_GP<"memd",  "LDd_GP",  DoubleRegs>;
-defm LDb_GP  : LD_GP<"memb",  "LDb_GP",  IntRegs>;
-defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
-defm LDh_GP  : LD_GP<"memh",  "LDh_GP",  IntRegs>;
-defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
-defm LDw_GP  : LD_GP<"memw",  "LDw_GP",  IntRegs>;
+defm LDd_GP  : LD_GP<"memd",  "LDd_GP",  DoubleRegs>, PredNewRel;
+defm LDb_GP  : LD_GP<"memb",  "LDb_GP",  IntRegs>, PredNewRel;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>, PredNewRel;
+defm LDh_GP  : LD_GP<"memh",  "LDh_GP",  IntRegs>, PredNewRel;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>, PredNewRel;
+defm LDw_GP  : LD_GP<"memw",  "LDw_GP",  IntRegs>, PredNewRel;
 
 def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
            (i64 (LDd_GP_V4 tglobaladdr:$global))>;
@@ -3139,9 +3021,10 @@ def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
 
 
 // Transfer global address into a register
-let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
-def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
-           "$dst = ##$src1",
+let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
+isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in
+def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+           "$dst = #$src1",
            [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
            Requires<[HasV4T]>;
 
@@ -3185,19 +3068,21 @@ def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
 
 // Load - Indirect with long offset: These instructions take global address
 // as an operand
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 3, AddedComplexity = 40,
+validSubTargets = HasV4SubT in
 def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
             "$dst=memd($src1<<#$src2+##$offset)",
             [(set (i64 DoubleRegs:$dst),
                   (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
                         (HexagonCONST32 tglobaladdr:$offset))))]>,
             Requires<[HasV4T]>;
 
-let AddedComplexity = 10 in
+let AddedComplexity = 40 in
 multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in
   def _lo_V4 : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
             !strconcat("$dst = ",
             !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
             [(set IntRegs:$dst,
@@ -3208,202 +3093,53 @@ multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
 
 defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
 defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>;
 defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
 defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>;
 defm LDriw_ind : LD_indirect_lo<"memw", load>;
 
-// Store - Indirect with long offset: These instructions take global address
-// as an operand
-let AddedComplexity = 10 in
-def STrid_ind_lo_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
-                 DoubleRegs:$src4),
-            "memd($src1<<#$src2+#$src3) = $src4",
-            [(store (i64 DoubleRegs:$src4),
-                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                      (HexagonCONST32 tglobaladdr:$src3)))]>,
-             Requires<[HasV4T]>;
-
-let AddedComplexity = 10 in
-multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
-  def _lo_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
-                 IntRegs:$src4),
-            !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
-            [(OpNode (i32 IntRegs:$src4),
-                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                      (HexagonCONST32 tglobaladdr:$src3)))]>,
-             Requires<[HasV4T]>;
-}
-
-defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
-defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
-defm STriw_ind : ST_indirect_lo<"memw", store>;
-
-// Store - absolute addressing mode: These instruction take constant
-// value as the extended operand.
-multiclass ST_absimm<string OpcStr> {
-let isExtended = 1, opExtendable = 0, isPredicable = 1,
-validSubTargets = HasV4SubT in
-  def _abs_V4 : STInst2<(outs),
-            (ins u0AlwaysExt:$src1, IntRegs:$src2),
-            !strconcat(OpcStr, "(##$src1) = $src2"),
-            []>,
-            Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 1, isPredicated = 1,
-validSubTargets = HasV4SubT in {
-  def _abs_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-}
-
-let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1,
-validSubTargets = HasV4SubT in
-  def _abs_nv_V4 : NVInst_V4<(outs),
-            (ins u0AlwaysExt:$src1, IntRegs:$src2),
-            !strconcat(OpcStr, "(##$src1) = $src2.new"),
-            []>,
-            Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1,
-isNVStore = 1, validSubTargets = HasV4SubT in {
-  def _abs_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-}
-}
+let AddedComplexity = 40 in
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+                                 (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+           (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+           Requires<[HasV4T]>;
 
-defm STrib_imm : ST_absimm<"memb">;
-defm STrih_imm : ST_absimm<"memh">;
-defm STriw_imm : ST_absimm<"memw">;
+let AddedComplexity = 40 in
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+                                 (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+           (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+           Requires<[HasV4T]>;
 
 let Predicates = [HasV4T], AddedComplexity  = 30 in {
 def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+          (STrib_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 
 def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+          (STrih_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 
 def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-}
-
-// Load - absolute addressing mode: These instruction take constant
-// value as the extended operand
-
-multiclass LD_absimm<string OpcStr> {
-let isExtended = 1, opExtendable = 1, isPredicable = 1,
-validSubTargets = HasV4SubT in
-  def _abs_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins u0AlwaysExt:$src),
-            !strconcat("$dst = ",
-            !strconcat(OpcStr, "(##$src)")),
-            []>,
-            Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, isPredicated = 1,
-validSubTargets = HasV4SubT in {
-  def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if ($src1) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if (!$src1) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if ($src1.new) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if (!$src1.new) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
+          (STriw_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 }
-}
-
-defm LDrib_imm  : LD_absimm<"memb">;
-defm LDriub_imm : LD_absimm<"memub">;
-defm LDrih_imm  : LD_absimm<"memh">;
-defm LDriuh_imm : LD_absimm<"memuh">;
-defm LDriw_imm  : LD_absimm<"memw">;
 
 let Predicates = [HasV4T], AddedComplexity  = 30 in {
 def : Pat<(i32 (load u0AlwaysExtPred:$src)),
-          (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDriw_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
-          (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDrib_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
-          (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDriub_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
-          (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDrih_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
-          (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDriuh_abs_V4 u0AlwaysExtPred:$src)>;
 }
 
-// Indexed store double word - global address.
+// Indexed store word - global address.
 // memw(Rs+#u6:2)=#S8
 let AddedComplexity = 10 in
 def STriw_offset_ext_V4 : STInst<(outs),
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 0318c519..bd7b26a 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -29,15 +29,18 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   std::vector<MachineInstr*> AllocaAdjustInsts;
   int VarArgsFrameIndex;
   bool HasClobberLR;
+  bool HasEHReturn;
 
   std::map<const MachineInstr*, unsigned> PacketInfo;
 
 
 public:
-  HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+  HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0),
+    HasEHReturn(false) {}
 
   HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
-                                                    HasClobberLR(0) {}
+                                                    HasClobberLR(0),
+                                                    HasEHReturn(false) {}
 
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -69,6 +72,8 @@ public:
   void setHasClobberLR(bool v) { HasClobberLR = v;  }
   bool hasClobberLR() const { return HasClobberLR; }
 
+  bool hasEHReturn() const { return HasEHReturn; };
+  void setHasEHReturn(bool H = true) { HasEHReturn = H; };
 };
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 5e80e48..05e6968 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -22,29 +22,31 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "hexagon-nvj"
-#include "Hexagon.h"
-#include "HexagonInstrInfo.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+
 #include <map>
+
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
@@ -57,6 +59,11 @@ static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
     cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable New Value Jumps"));
 
+namespace llvm {
+  void initializeHexagonNewValueJumpPass(PassRegistry&);
+}
+
+
 namespace {
   struct HexagonNewValueJump : public MachineFunctionPass {
     const HexagonInstrInfo    *QII;
@@ -65,9 +72,12 @@ namespace {
   public:
     static char ID;
 
-    HexagonNewValueJump() : MachineFunctionPass(ID) { }
+    HexagonNewValueJump() : MachineFunctionPass(ID) {
+      initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineBranchProbabilityInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -78,6 +88,8 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
   private:
+    /// \brief A handle to the branch probability pass.
+    const MachineBranchProbabilityInfo *MBPI;
 
   };
 
@@ -85,6 +97,13 @@ namespace {
 
 char HexagonNewValueJump::ID = 0;
 
+INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj",
+                      "Hexagon NewValueJump", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj",
+                    "Hexagon NewValueJump", false, false)
+
+
 // We have identified this II could be feeder to NVJ,
 // verify that it can be.
 static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
@@ -208,19 +227,15 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
   // range specified by the arch.
   if (!secondReg) {
     int64_t v = MI->getOperand(2).getImm();
-    if (MI->getOpcode() == Hexagon::CMPGEri ||
-       (MI->getOpcode() == Hexagon::CMPGEUri && v > 0))
-      --v;
 
     if (!(isUInt<5>(v) ||
          ((MI->getOpcode() == Hexagon::CMPEQri ||
-           MI->getOpcode() == Hexagon::CMPGTri ||
-           MI->getOpcode() == Hexagon::CMPGEri) &&
+           MI->getOpcode() == Hexagon::CMPGTri) &&
           (v == -1))))
       return false;
   }
 
-  unsigned cmpReg1, cmpOp2;
+  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
   cmpReg1 = MI->getOperand(1).getReg();
 
   if (secondReg) {
@@ -271,58 +286,63 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
 // Given a compare operator, return a matching New Value Jump
 // compare operator. Make sure that MI here is included in
 // HexagonInstrInfo.cpp::isNewValueJumpCandidate
-static unsigned getNewValueJumpOpcode(const MachineInstr *MI, int reg,
-                                      bool secondRegNewified) {
+static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
+                                      bool secondRegNewified,
+                                      MachineBasicBlock *jmpTarget,
+                                      const MachineBranchProbabilityInfo
+                                      *MBPI) {
+  bool taken = false;
+  MachineBasicBlock *Src = MI->getParent();
+  const BranchProbability Prediction =
+    MBPI->getEdgeProbability(Src, jmpTarget);
+
+  if (Prediction >= BranchProbability(1,2))
+    taken = true;
+
   switch (MI->getOpcode()) {
     case Hexagon::CMPEQrr:
-      return Hexagon::JMP_EQrrPt_nv_V4;
+      return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4
+                   : Hexagon::CMPEQrr_t_Jumpnv_nt_V4;
 
     case Hexagon::CMPEQri: {
       if (reg >= 0)
-        return Hexagon::JMP_EQriPt_nv_V4;
+        return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4
+                     : Hexagon::CMPEQri_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_EQriPtneg_nv_V4;
+        return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4
+                     : Hexagon::CMPEQn1_t_Jumpnv_nt_V4;
     }
 
-    case Hexagon::CMPLTrr:
     case Hexagon::CMPGTrr: {
       if (secondRegNewified)
-        return Hexagon::JMP_GTrrdnPt_nv_V4;
+        return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPLTrr_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_GTrrPt_nv_V4;
-    }
-
-    case Hexagon::CMPGEri: {
-      if (reg >= 1)
-        return Hexagon::JMP_GTriPt_nv_V4;
-      else
-        return Hexagon::JMP_GTriPtneg_nv_V4;
+        return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTrr_t_Jumpnv_nt_V4;
     }
 
     case Hexagon::CMPGTri: {
       if (reg >= 0)
-        return Hexagon::JMP_GTriPt_nv_V4;
+        return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTri_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_GTriPtneg_nv_V4;
+        return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTn1_t_Jumpnv_nt_V4;
     }
 
-    case Hexagon::CMPLTUrr:
     case Hexagon::CMPGTUrr: {
       if (secondRegNewified)
-        return Hexagon::JMP_GTUrrdnPt_nv_V4;
+        return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_GTUrrPt_nv_V4;
+        return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4;
     }
 
     case Hexagon::CMPGTUri:
-      return Hexagon::JMP_GTUriPt_nv_V4;
-
-    case Hexagon::CMPGEUri: {
-      if (reg == 0)
-        return Hexagon::JMP_EQrrPt_nv_V4;
-      else
-        return Hexagon::JMP_GTUriPt_nv_V4;
-    }
+      return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4
+                   : Hexagon::CMPGTUri_t_Jumpnv_nt_V4;
 
     default:
        llvm_unreachable("Could not find matching New Value Jump instruction.");
@@ -346,6 +366,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
   QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
   QRI =
     static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
   if (!QRI->Subtarget.hasV4TOps() ||
       DisableNewValueJumps) {
@@ -393,12 +414,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
       DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
 
       if (!foundJump &&
-         (MI->getOpcode() == Hexagon::JMP_c ||
-          MI->getOpcode() == Hexagon::JMP_cNot ||
-          MI->getOpcode() == Hexagon::JMP_cdnPt ||
-          MI->getOpcode() == Hexagon::JMP_cdnPnt ||
-          MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
-          MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) {
+         (MI->getOpcode() == Hexagon::JMP_t ||
+          MI->getOpcode() == Hexagon::JMP_f ||
+          MI->getOpcode() == Hexagon::JMP_tnew_t ||
+          MI->getOpcode() == Hexagon::JMP_tnew_nt ||
+          MI->getOpcode() == Hexagon::JMP_fnew_t ||
+          MI->getOpcode() == Hexagon::JMP_fnew_nt)) {
         // This is where you would insert your compare and
         // instr that feeds compare
         jmpPos = MII;
@@ -434,9 +455,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
 
         jmpTarget = MI->getOperand(1).getMBB();
         foundJump = true;
-        if (MI->getOpcode() == Hexagon::JMP_cNot ||
-            MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
-            MI->getOpcode() == Hexagon::JMP_cdnNotPnt) {
+        if (MI->getOpcode() == Hexagon::JMP_f ||
+            MI->getOpcode() == Hexagon::JMP_fnew_t ||
+            MI->getOpcode() == Hexagon::JMP_fnew_nt) {
           invertPredicate = true;
         }
         continue;
@@ -525,10 +546,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
           if (isSecondOpReg) {
             // In case of CMPLT, or CMPLTU, or EQ with the second register
             // to newify, swap the operands.
-            if (cmpInstr->getOpcode() == Hexagon::CMPLTrr  ||
-                cmpInstr->getOpcode() == Hexagon::CMPLTUrr ||
-                (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
-                                     feederReg == (unsigned) cmpOp2)) {
+            if (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
+                                     feederReg == (unsigned) cmpOp2) {
               unsigned tmp = cmpReg1;
               bool tmpIsKill = MO1IsKill;
               cmpReg1 = cmpOp2;
@@ -582,42 +601,34 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
            assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
                       "This compare is not a New Value Jump candidate.");
           unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
-                                               isSecondOpNewified);
+                                               isSecondOpNewified,
+                                               jmpTarget, MBPI);
           if (invertPredicate)
             opc = QII->getInvertedPredicatedOpcode(opc);
 
-          // Manage the conversions from CMPGEUri to either CMPEQrr
-          // or CMPGTUri properly. See Arch spec for CMPGEUri instructions.
-          // This has to be after the getNewValueJumpOpcode function call as
-          // second operand of the compare could be modified in this logic.
-          if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) {
-            if (cmpOp2 == 0) {
-              cmpOp2 = cmpReg1;
-              MO2IsKill = MO1IsKill;
-              isSecondOpReg = true;
-            } else
-              --cmpOp2;
-          }
-
-          // Manage the conversions from CMPGEri to CMPGTUri properly.
-          // See Arch spec for CMPGEri instructions.
-          if (cmpInstr->getOpcode() == Hexagon::CMPGEri)
-            --cmpOp2;
-
-          if (isSecondOpReg) {
+          if (isSecondOpReg)
             NewMI = BuildMI(*MBB, jmpPos, dl,
                                   QII->get(opc))
                                     .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                     .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                     .addMBB(jmpTarget);
-          }
-          else {
+
+          else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri ||
+                    cmpInstr->getOpcode() == Hexagon::CMPGTri) &&
+                    cmpOp2 == -1 )
+            // Corresponding new-value compare jump instructions don't have the
+            // operand for -1 immediate value.
+            NewMI = BuildMI(*MBB, jmpPos, dl,
+                                  QII->get(opc))
+                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
+                                    .addMBB(jmpTarget);
+
+          else
             NewMI = BuildMI(*MBB, jmpPos, dl,
                                   QII->get(opc))
                                     .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                     .addImm(cmpOp2)
                                     .addMBB(jmpTarget);
-          }
 
           assert(NewMI && "New Value Jump Instruction Not created!");
           if (cmpInstr->getOperand(0).isReg() &&
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index 576f1d7..89e3406 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -61,10 +61,6 @@ static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
     cl::Hidden, cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable Peephole Optimization"));
 
-static cl::opt<int>
-DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden,
-  cl::desc("Maximum number of P=NOT(P) to be optimized"));
-
 static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
     cl::Hidden, cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable Optimization of PNotP"));
@@ -73,6 +69,14 @@ static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
     cl::Hidden, cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable Optimization of Sign/Zero Extends"));
 
+static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Optimization of extensions to i64."));
+
+namespace llvm {
+  void initializeHexagonPeepholePass(PassRegistry&);
+}
+
 namespace {
   struct HexagonPeephole : public MachineFunctionPass {
     const HexagonInstrInfo    *QII;
@@ -81,7 +85,9 @@ namespace {
 
   public:
     static char ID;
-    HexagonPeephole() : MachineFunctionPass(ID) { }
+    HexagonPeephole() : MachineFunctionPass(ID) {
+      initializeHexagonPeepholePass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnMachineFunction(MachineFunction &MF);
 
@@ -100,8 +106,10 @@ namespace {
 
 char HexagonPeephole::ID = 0;
 
-bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
+                false, false)
 
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
   QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
                                         getInstrInfo());
   QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
@@ -142,6 +150,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
         }
       }
 
+      // Look for  %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
+      // %vreg170:DoublRegs, %vreg169:IntRegs
+      if (!DisableOptExtTo64 &&
+          MI->getOpcode () == Hexagon::COMBINE_Ir_V4) {
+        assert (MI->getNumOperands() == 3);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src1 = MI->getOperand(1);
+        MachineOperand &Src2 = MI->getOperand(2);
+        if (Src1.getImm() != 0)
+          continue;
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src2.getReg();
+        PeepholeMap[DstReg] = SrcReg;
+      }
+
       // Look for this sequence below
       // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
       // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 34bf4ea..44234e8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -21,11 +21,18 @@
 #include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
+
+namespace llvm {
+  void initializeHexagonRemoveExtendArgsPass(PassRegistry&);
+}
+
 namespace {
   struct HexagonRemoveExtendArgs : public FunctionPass {
   public:
     static char ID;
-    HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+    HexagonRemoveExtendArgs() : FunctionPass(ID) {
+      initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry());
+    }
     virtual bool runOnFunction(Function &F);
 
     const char *getPassName() const {
@@ -41,11 +48,9 @@ namespace {
 }
 
 char HexagonRemoveExtendArgs::ID = 0;
-RegisterPass<HexagonRemoveExtendArgs> X("reargs",
-                                        "Remove Sign and Zero Extends for Args"
-                                        );
-
 
+INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs",
+                "Remove Sign and Zero Extends for Args", false, false)
 
 bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
   unsigned Idx = 1;
@@ -78,6 +83,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
 
 
 
-FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+FunctionPass*
+llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) {
   return new HexagonRemoveExtendArgs();
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index 814249f..8608e08 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -49,16 +49,23 @@
 
 using namespace llvm;
 
+namespace llvm {
+  void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
+}
+
+
 namespace {
 
 class HexagonSplitTFRCondSets : public MachineFunctionPass {
-    HexagonTargetMachine& QTM;
+    const HexagonTargetMachine &QTM;
     const HexagonSubtarget &QST;
 
  public:
     static char ID;
-    HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
-      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+    HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+      initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
+    }
 
     const char *getPassName() const {
       return "Hexagon Split TFRCondSets";
@@ -211,6 +218,18 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Split TFRCondSets";
+  PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
+                              &HexagonSplitTFRCondSets::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) {
   return new HexagonSplitTFRCondSets(TM);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index ce45c62..caa1ba4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -25,19 +25,17 @@
 
 using namespace llvm;
 
-static cl::
-opt<bool> DisableHardwareLoops(
-                        "disable-hexagon-hwloops", cl::Hidden,
-                        cl::desc("Disable Hardware Loops for Hexagon target"));
+static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+      cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
 
-static cl::
-opt<bool> DisableHexagonMISched("disable-hexagon-misched",
-                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
-                                cl::desc("Disable Hexagon MI Scheduling"));
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+      cl::Hidden, cl::ZeroOrMore, cl::init(false),
+      cl::desc("Disable Hexagon MI Scheduling"));
 
 static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
-    cl::Hidden, cl::ZeroOrMore, cl::init(false),
-    cl::desc("Disable Hexagon CFG Optimization"));
+      cl::Hidden, cl::ZeroOrMore, cl::init(false),
+      cl::desc("Disable Hexagon CFG Optimization"));
+
 
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
@@ -126,55 +124,62 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 bool HexagonPassConfig::addInstSelector() {
+  const HexagonTargetMachine &TM = getHexagonTargetMachine();
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
 
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+  if (!NoOpt)
+    addPass(createHexagonRemoveExtendArgs(TM));
 
-  addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
+  addPass(createHexagonISelDag(TM, getOptLevel()));
 
-  if (getOptLevel() != CodeGenOpt::None)
+  if (!NoOpt) {
     addPass(createHexagonPeephole());
+    printAndVerify("After hexagon peephole pass");
+  }
 
   return false;
 }
 
-
 bool HexagonPassConfig::addPreRegAlloc() {
-  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonHardwareLoops());
+  if (getOptLevel() != CodeGenOpt::None)
+    if (!DisableHardwareLoops)
+      addPass(createHexagonHardwareLoops());
   return false;
 }
 
 bool HexagonPassConfig::addPostRegAlloc() {
-  if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
-  return true;
+  const HexagonTargetMachine &TM = getHexagonTargetMachine();
+  if (getOptLevel() != CodeGenOpt::None)
+    if (!DisableHexagonCFGOpt)
+      addPass(createHexagonCFGOptimizer(TM));
+  return false;
 }
 
-
 bool HexagonPassConfig::addPreSched2() {
   if (getOptLevel() != CodeGenOpt::None)
     addPass(&IfConverterID);
-  return true;
+  return false;
 }
 
 bool HexagonPassConfig::addPreEmitPass() {
+  const HexagonTargetMachine &TM = getHexagonTargetMachine();
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
 
-  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonFixupHwLoops());
-
-  if (getOptLevel() != CodeGenOpt::None)
+  if (!NoOpt)
     addPass(createHexagonNewValueJump());
 
   // Expand Spill code for predicate registers.
-  addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+  addPass(createHexagonExpandPredSpillCode(TM));
 
   // Split up TFRcondsets into conditional transfers.
-  addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+  addPass(createHexagonSplitTFRCondSets(TM));
 
   // Create Packets.
-  if (getOptLevel() != CodeGenOpt::None)
+  if (!NoOpt) {
+    if (!DisableHardwareLoops)
+      addPass(createHexagonFixupHwLoops());
     addPass(createHexagonPacketizer());
+  }
 
   return false;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c0d86da..39995e1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -48,19 +48,32 @@
 #include "HexagonMachineFunctionInfo.h"
 
 #include <map>
+#include <vector>
 
 using namespace llvm;
 
+static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
+      cl::ZeroOrMore, cl::Hidden, cl::init(true),
+      cl::desc("Allow non-solo packetization of volatile memory references"));
+
+namespace llvm {
+  void initializeHexagonPacketizerPass(PassRegistry&);
+}
+
+
 namespace {
   class HexagonPacketizer : public MachineFunctionPass {
 
   public:
     static char ID;
-    HexagonPacketizer() : MachineFunctionPass(ID) {}
+    HexagonPacketizer() : MachineFunctionPass(ID) {
+      initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
+    }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineLoopInfo>();
@@ -96,10 +109,17 @@ namespace {
     // schedule this instruction.
     bool FoundSequentialDependence;
 
+    /// \brief A handle to the branch probability pass.
+   const MachineBranchProbabilityInfo *MBPI;
+
+   // Track MIs with ignored dependece.
+   std::vector<MachineInstr*> IgnoreDepMIs;
+
   public:
     // Ctor.
     HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
-                          MachineDominatorTree &MDT);
+                          MachineDominatorTree &MDT,
+                          const MachineBranchProbabilityInfo *MBPI);
 
     // initPacketizerState - initialize some internal flags.
     void initPacketizerState();
@@ -123,20 +143,20 @@ namespace {
   private:
     bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
     bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
-                    MachineBasicBlock::iterator &MII,
-                    const TargetRegisterClass* RC);
+                         MachineBasicBlock::iterator &MII,
+                         const TargetRegisterClass* RC);
     bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
-                    unsigned DepReg,
-                    std::map <MachineInstr*, SUnit*> MIToSUnit,
-                    MachineBasicBlock::iterator &MII,
-                    const TargetRegisterClass* RC);
+                            unsigned DepReg,
+                            std::map <MachineInstr*, SUnit*> MIToSUnit,
+                            MachineBasicBlock::iterator &MII,
+                            const TargetRegisterClass* RC);
     bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
-                    unsigned DepReg,
-                    std::map <MachineInstr*, SUnit*> MIToSUnit,
-                    MachineBasicBlock::iterator &MII);
+                              unsigned DepReg,
+                              std::map <MachineInstr*, SUnit*> MIToSUnit,
+                              MachineBasicBlock::iterator &MII);
     bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
-                    unsigned DepReg,
-                    std::map <MachineInstr*, SUnit*> MIToSUnit);
+                                   unsigned DepReg,
+                                   std::map <MachineInstr*, SUnit*> MIToSUnit);
     bool DemoteToDotOld(MachineInstr* MI);
     bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
                     std::map <MachineInstr*, SUnit*> MIToSUnit);
@@ -152,19 +172,32 @@ namespace {
   };
 }
 
+INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
+                    false, false)
+
+
 // HexagonPacketizerList Ctor.
 HexagonPacketizerList::HexagonPacketizerList(
-  MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT)
+  MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT,
+  const MachineBranchProbabilityInfo *MBPI)
   : VLIWPacketizerList(MF, MLI, MDT, true){
+  this->MBPI = MBPI;
 }
 
 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
   const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
-
+  const MachineBranchProbabilityInfo *MBPI =
+    &getAnalysis<MachineBranchProbabilityInfo>();
   // Instantiate the packetizer.
-  HexagonPacketizerList Packetizer(Fn, MLI, MDT);
+  HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI);
 
   // DFA state table should not be empty.
   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -710,8 +743,10 @@ static int GetDotNewOp(const int opc) {
 }
 
 // Return .new predicate version for an instruction
-static int GetDotNewPredOp(const int opc) {
-  switch (opc) {
+static int GetDotNewPredOp(MachineInstr *MI,
+                           const MachineBranchProbabilityInfo *MBPI,
+                           const HexagonInstrInfo *QII) {
+  switch (MI->getOpcode()) {
   default: llvm_unreachable("Unknown .new type");
   // Conditional stores
   // Store byte conditionally
@@ -857,17 +892,15 @@ static int GetDotNewPredOp(const int opc) {
     return Hexagon::STw_GP_cdnNotPt_V4;
 
   // Condtional Jumps
-  case Hexagon::JMP_c:
-    return Hexagon::JMP_cdnPt;
+  case Hexagon::JMP_t:
+  case Hexagon::JMP_f:
+    return QII->getDotNewPredJumpOp(MI, MBPI);
 
-  case Hexagon::JMP_cNot:
-    return Hexagon::JMP_cdnNotPt;
+  case Hexagon::JMPR_t:
+    return Hexagon::JMPR_tnew_tV3;
 
-  case Hexagon::JMPR_cPt:
-    return Hexagon::JMPR_cdnPt_V3;
-
-  case Hexagon::JMPR_cNotPt:
-    return Hexagon::JMPR_cdnNotPt_V3;
+  case Hexagon::JMPR_f:
+    return Hexagon::JMPR_fnew_tV3;
 
   // Conditional Transfers
   case Hexagon::TFR_cPt:
@@ -1261,7 +1294,7 @@ bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
 
   int NewOpcode;
   if (RC == &Hexagon::PredRegsRegClass)
-    NewOpcode = GetDotNewPredOp(MI->getOpcode());
+    NewOpcode = GetDotNewPredOp(MI, MBPI, QII);
   else
     NewOpcode = GetDotNewOp(MI->getOpcode());
   MI->setDesc(QII->get(NewOpcode));
@@ -1306,17 +1339,17 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::TFRI_cdnNotPt:
     return Hexagon::TFRI_cNotPt;
 
-  case Hexagon::JMP_cdnPt:
-    return Hexagon::JMP_c;
+  case Hexagon::JMP_tnew_t:
+    return Hexagon::JMP_t;
 
-  case Hexagon::JMP_cdnNotPt:
-    return Hexagon::JMP_cNot;
+  case Hexagon::JMP_fnew_t:
+    return Hexagon::JMP_f;
 
-  case Hexagon::JMPR_cdnPt_V3:
-    return Hexagon::JMPR_cPt;
+  case Hexagon::JMPR_tnew_tV3:
+    return Hexagon::JMPR_t;
 
-  case Hexagon::JMPR_cdnNotPt_V3:
-    return Hexagon::JMPR_cNotPt;
+  case Hexagon::JMPR_fnew_tV3:
+    return Hexagon::JMPR_f;
 
   // Load double word
 
@@ -1912,7 +1945,7 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::STrih_imm_cdnPt_V4 :
   case Hexagon::STriw_imm_cPt_V4 :
   case Hexagon::STriw_imm_cdnPt_V4 :
-  case Hexagon::JMP_cdnPt :
+  case Hexagon::JMP_tnew_t :
   case Hexagon::LDrid_cPt :
   case Hexagon::LDrid_cdnPt :
   case Hexagon::LDrid_indexed_cPt :
@@ -2051,7 +2084,7 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::STrih_imm_cdnNotPt_V4 :
   case Hexagon::STriw_imm_cNotPt_V4 :
   case Hexagon::STriw_imm_cdnNotPt_V4 :
-  case Hexagon::JMP_cdnNotPt :
+  case Hexagon::JMP_fnew_t :
   case Hexagon::LDrid_cNotPt :
   case Hexagon::LDrid_cdnNotPt :
   case Hexagon::LDrid_indexed_cNotPt :
@@ -2739,9 +2772,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     // If an instruction feeds new value jump, glue it.
     MachineBasicBlock::iterator NextMII = I;
     ++NextMII;
-    MachineInstr *NextMI = NextMII;
-
-    if (QII->isNewValueJump(NextMI)) {
+    if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+      MachineInstr *NextMI = NextMII;
 
       bool secondRegMatch = false;
       bool maintainNewValueJump = false;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 78ad24d..34e33fd 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -237,7 +237,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
           // Use load to get GOT target
           SDValue Ops[] = { Callee, GPReg, Chain };
           SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
-                                 MVT::i32, MVT::Other, Ops, 3), 0);
+                                 MVT::i32, MVT::Other, Ops), 0);
           Chain = Load.getValue(1);
 
           // Call target must be on T9
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
index f86bc0b..d27cd39 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -724,8 +724,7 @@ let usesCustomInserter=1 in {
     [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
 
   def MEMBARRIER : MBlazePseudo<(outs), (ins),
-    "# memory barrier",
-    [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+    "# memory barrier", []>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
index edfd421..d31efa8 100644
--- a/contrib/llvm/lib/Target/Mangler.cpp
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -188,7 +188,12 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   
   // If this global has a name, handle it simply.
   if (GV->hasName()) {
-    getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+    StringRef Name = GV->getName();
+    getNameWithPrefix(OutName, Name, PrefixTy);
+    // No need to do anything else if the global has the special "do not mangle"
+    // flag in the name.
+    if (Name[0] == 1)
+      return;
   } else {
     // Get the ID for the global, assigning a new one if we haven't got one
     // already.
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index c403f21..0795cb9 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -63,7 +63,6 @@ class MipsAsmParser : public MCTargetAsmParser {
   MCAsmParser &Parser;
   MipsAssemblerOptions Options;
 
-
 #define GET_ASSEMBLER_HEADER
 #include "MipsGenAsmMatcher.inc"
 
@@ -127,9 +126,12 @@ class MipsAsmParser : public MCTargetAsmParser {
                      bool isLoad,bool isImmOpnd);
   bool reportParseError(StringRef ErrorMsg);
 
-  bool parseMemOffset(const MCExpr *&Res);
+  bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
   bool parseRelocOperand(const MCExpr *&Res);
 
+  const MCExpr* evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
+
+  bool isEvaluated(const MCExpr *Expr);
   bool parseDirectiveSet();
 
   bool parseSetAtDirective();
@@ -171,7 +173,7 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   bool requestsDoubleOperand(StringRef Mnemonic);
 
-  unsigned getReg(int RC,int RegNo);
+  unsigned getReg(int RC, int RegNo);
 
   int getATReg();
 
@@ -269,7 +271,7 @@ public:
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCExpr *Expr = getImm();
-    addExpr(Inst,Expr);
+    addExpr(Inst, Expr);
   }
 
   void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -278,7 +280,7 @@ public:
     Inst.addOperand(MCOperand::CreateReg(getMemBase()));
 
     const MCExpr *Expr = getMemOff();
-    addExpr(Inst,Expr);
+    addExpr(Inst, Expr);
   }
 
   bool isReg() const { return Kind == k_Register; }
@@ -391,15 +393,19 @@ public:
   }
 
   /// getStartLoc - Get the location of the first token of this operand.
-  SMLoc getStartLoc() const { return StartLoc; }
+  SMLoc getStartLoc() const {
+    return StartLoc;
+  }
   /// getEndLoc - Get the location of the last token of this operand.
-  SMLoc getEndLoc() const { return EndLoc; }
+  SMLoc getEndLoc() const {
+    return EndLoc;
+  }
 
   virtual void print(raw_ostream &OS) const {
     llvm_unreachable("unimplemented!");
   }
-};
-}
+}; // class MipsOperand
+}  // namespace
 
 namespace llvm {
 extern const MCInstrDesc MipsInsts[];
@@ -409,39 +415,55 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
 }
 
 bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
-                        SmallVectorImpl<MCInst> &Instructions) {
+                                       SmallVectorImpl<MCInst> &Instructions) {
   const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
   Inst.setLoc(IDLoc);
+  if (MCID.hasDelaySlot() && Options.isReorder()) {
+    // If this instruction has a delay slot and .set reorder is active,
+    // emit a NOP after it.
+    Instructions.push_back(Inst);
+    MCInst NopInst;
+    NopInst.setOpcode(Mips::SLL);
+    NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::CreateImm(0));
+    Instructions.push_back(NopInst);
+    return false;
+  }
+
   if (MCID.mayLoad() || MCID.mayStore()) {
     // Check the offset of memory operand, if it is a symbol
-    // reference or immediate we may have to expand instructions
-    for (unsigned i=0;i<MCID.getNumOperands();i++) {
+    // reference or immediate we may have to expand instructions.
+    for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
       const MCOperandInfo &OpInfo = MCID.OpInfo[i];
-      if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
-          (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+      if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY)
+          || (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
         MCOperand &Op = Inst.getOperand(i);
         if (Op.isImm()) {
           int MemOffset = Op.getImm();
           if (MemOffset < -32768 || MemOffset > 32767) {
-            // Offset can't exceed 16bit value
-            expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+            // Offset can't exceed 16bit value.
+            expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), true);
             return false;
           }
         } else if (Op.isExpr()) {
           const MCExpr *Expr = Op.getExpr();
-          if (Expr->getKind() == MCExpr::SymbolRef){
+          if (Expr->getKind() == MCExpr::SymbolRef) {
             const MCSymbolRefExpr *SR =
-                    static_cast<const MCSymbolRefExpr*>(Expr);
+                static_cast<const MCSymbolRefExpr*>(Expr);
             if (SR->getKind() == MCSymbolRefExpr::VK_None) {
-              // Expand symbol
-              expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+              // Expand symbol.
+              expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
               return false;
             }
+          } else if (!isEvaluated(Expr)) {
+            expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
+            return false;
           }
         }
       }
-    }
-  }
+    } // for
+  } // if load/store
 
   if (needsExpansion(Inst))
     expandInstruction(Inst, IDLoc, Instructions);
@@ -453,30 +475,30 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
 
 bool MipsAsmParser::needsExpansion(MCInst &Inst) {
 
-  switch(Inst.getOpcode()) {
-    case Mips::LoadImm32Reg:
-    case Mips::LoadAddr32Imm:
-    case Mips::LoadAddr32Reg:
-      return true;
-    default:
-      return false;
+  switch (Inst.getOpcode()) {
+  case Mips::LoadImm32Reg:
+  case Mips::LoadAddr32Imm:
+  case Mips::LoadAddr32Reg:
+    return true;
+  default:
+    return false;
   }
 }
 
 void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
-                        SmallVectorImpl<MCInst> &Instructions){
-  switch(Inst.getOpcode()) {
-    case Mips::LoadImm32Reg:
-      return expandLoadImm(Inst, IDLoc, Instructions);
-    case Mips::LoadAddr32Imm:
-      return expandLoadAddressImm(Inst,IDLoc,Instructions);
-    case Mips::LoadAddr32Reg:
-      return expandLoadAddressReg(Inst,IDLoc,Instructions);
-    }
+                                       SmallVectorImpl<MCInst> &Instructions) {
+  switch (Inst.getOpcode()) {
+  case Mips::LoadImm32Reg:
+    return expandLoadImm(Inst, IDLoc, Instructions);
+  case Mips::LoadAddr32Imm:
+    return expandLoadAddressImm(Inst, IDLoc, Instructions);
+  case Mips::LoadAddr32Reg:
+    return expandLoadAddressReg(Inst, IDLoc, Instructions);
+  }
 }
 
 void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
-                                  SmallVectorImpl<MCInst> &Instructions){
+                                  SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
   assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -485,26 +507,24 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
 
   int ImmValue = ImmOp.getImm();
   tmpInst.setLoc(IDLoc);
-  if ( 0 <= ImmValue && ImmValue <= 65535) {
-    // for 0 <= j <= 65535.
+  if (0 <= ImmValue && ImmValue <= 65535) {
+    // For 0 <= j <= 65535.
     // li d,j => ori d,$zero,j
     tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
-  } else if ( ImmValue < 0 && ImmValue >= -32768) {
-    // for -32768 <= j < 0.
+  } else if (ImmValue < 0 && ImmValue >= -32768) {
+    // For -32768 <= j < 0.
     // li d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    // for any other value of j that is representable as a 32-bit integer.
+    // For any other value of j that is representable as a 32-bit integer.
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
@@ -522,7 +542,7 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
-                                         SmallVectorImpl<MCInst> &Instructions){
+                                       SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(2);
   assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -531,19 +551,19 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
   const MCOperand &DstRegOp = Inst.getOperand(0);
   assert(DstRegOp.isReg() && "expected register operand kind");
   int ImmValue = ImmOp.getImm();
-  if ( -32768 <= ImmValue && ImmValue <= 65535) {
-    //for -32768 <= j <= 65535.
-    //la d,j(s) => addiu d,s,j
+  if (-32768 <= ImmValue && ImmValue <= 65535) {
+    // For -32768 <= j <= 65535.
+    // la d,j(s) => addiu d,s,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    //for any other value of j that is representable as a 32-bit integer.
-    //la d,j(s) => lui d,hi16(j)
-    //             ori d,d,lo16(j)
-    //             addu d,d,s
+    // For any other value of j that is representable as a 32-bit integer.
+    // la d,j(s) => lui d,hi16(j)
+    //              ori d,d,lo16(j)
+    //              addu d,d,s
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -564,26 +584,25 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
-                                         SmallVectorImpl<MCInst> &Instructions){
+                                       SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
   assert(ImmOp.isImm() && "expected immediate operand kind");
   const MCOperand &RegOp = Inst.getOperand(0);
   assert(RegOp.isReg() && "expected register operand kind");
   int ImmValue = ImmOp.getImm();
-  if ( -32768 <= ImmValue && ImmValue <= 65535) {
-    //for -32768 <= j <= 65535.
-    //la d,j => addiu d,$zero,j
+  if (-32768 <= ImmValue && ImmValue <= 65535) {
+    // For -32768 <= j <= 65535.
+    // la d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    //for any other value of j that is representable as a 32-bit integer.
-    //la d,j => lui d,hi16(j)
-    //          ori d,d,lo16(j)
+    // For any other value of j that is representable as a 32-bit integer.
+    // la d,j => lui d,hi16(j)
+    //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -598,40 +617,37 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
-                     SmallVectorImpl<MCInst> &Instructions,
-                     bool isLoad,bool isImmOpnd) {
+          SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd) {
   const MCSymbolRefExpr *SR;
   MCInst TempInst;
-  unsigned ImmOffset,HiOffset,LoOffset;
+  unsigned ImmOffset, HiOffset, LoOffset;
   const MCExpr *ExprOffset;
   unsigned TmpRegNum;
-  unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
-                                            Mips::CPURegsRegClassID,
-                                            getATReg());
-  // 1st operand is either source or dst register
+  unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID
+                             : Mips::CPURegsRegClassID, getATReg());
+  // 1st operand is either the source or destination register.
   assert(Inst.getOperand(0).isReg() && "expected register operand kind");
   unsigned RegOpNum = Inst.getOperand(0).getReg();
-  // 2nd operand is base register
+  // 2nd operand is the base register.
   assert(Inst.getOperand(1).isReg() && "expected register operand kind");
   unsigned BaseRegNum = Inst.getOperand(1).getReg();
-  // 3rd operand is either immediate or expression
+  // 3rd operand is either an immediate or expression.
   if (isImmOpnd) {
     assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
     ImmOffset = Inst.getOperand(2).getImm();
     LoOffset = ImmOffset & 0x0000ffff;
     HiOffset = (ImmOffset & 0xffff0000) >> 16;
-    // If msb of LoOffset is 1(negative number) we must increment HiOffset
+    // If msb of LoOffset is 1(negative number) we must increment HiOffset.
     if (LoOffset & 0x8000)
       HiOffset++;
-  }
-  else
+  } else
     ExprOffset = Inst.getOperand(2).getExpr();
-  // All instructions will have the same location
+  // All instructions will have the same location.
   TempInst.setLoc(IDLoc);
   // 1st instruction in expansion is LUi. For load instruction we can use
   // the dst register as a temporary if base and dst are different,
-  // but for stores we must use $at
-  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+  // but for stores we must use $at.
+  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
   TempInst.setOpcode(Mips::LUi);
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   if (isImmOpnd)
@@ -639,26 +655,28 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   else {
     if (ExprOffset->getKind() == MCExpr::SymbolRef) {
       SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
-      const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
-                                        Create(SR->getSymbol().getName(),
-                                        MCSymbolRefExpr::VK_Mips_ABS_HI,
-                                        getContext());
+      const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
+          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
+          getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+    } else {
+      const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
       TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
     }
   }
-  // Add the instruction to the list
+  // Add the instruction to the list.
   Instructions.push_back(TempInst);
-  // and prepare TempInst for next instruction
+  // Prepare TempInst for next instruction.
   TempInst.clear();
-  // which is add temp register to base
+  // Add temp register to base.
   TempInst.setOpcode(Mips::ADDu);
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
   Instructions.push_back(TempInst);
   TempInst.clear();
-  // and finaly, create original instruction with low part
-  // of offset and new base
+  // And finaly, create original instruction with low part
+  // of offset and new base.
   TempInst.setOpcode(Inst.getOpcode());
   TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
@@ -666,10 +684,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
     TempInst.addOperand(MCOperand::CreateImm(LoOffset));
   else {
     if (ExprOffset->getKind() == MCExpr::SymbolRef) {
-      const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
-                                      Create(SR->getSymbol().getName(),
-                                      MCSymbolRefExpr::VK_Mips_ABS_LO,
-                                      getContext());
+      const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
+          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO,
+          getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+    } else {
+      const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
       TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
     }
   }
@@ -688,11 +708,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                               MatchingInlineAsm);
 
   switch (MatchResult) {
-  default: break;
+  default:
+    break;
   case Match_Success: {
-    if (processInstruction(Inst,IDLoc,Instructions))
+    if (processInstruction(Inst, IDLoc, Instructions))
       return true;
-    for(unsigned i =0; i < Instructions.size(); i++)
+    for (unsigned i = 0; i < Instructions.size(); i++)
       Out.EmitInstruction(Instructions[i]);
     return false;
   }
@@ -705,8 +726,9 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
-      ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc();
-      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+      ErrorLoc = ((MipsOperand*) Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
     }
 
     return Error(ErrorLoc, "invalid operand for instruction");
@@ -757,10 +779,10 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
     .Case("t9",  25)
     .Default(-1);
 
-  // Although SGI documentation just cut out t0-t3 for n32/n64,
+  // Although SGI documentation just cuts out t0-t3 for n32/n64,
   // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
   // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
-  if (isMips64() && 8 <= CC  && CC <= 11)
+  if (isMips64() && 8 <= CC && CC <= 11)
     CC += 4;
 
   if (CC == -1 && isMips64())
@@ -776,19 +798,23 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
 
   return CC;
 }
+
 int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
 
+  if (Name.equals("fcc0"))
+    return Mips::FCC0;
+
   int CC;
   CC = matchCPURegisterName(Name);
   if (CC != -1)
-    return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
-                               Mips::CPURegsRegClassID);
+    return matchRegisterByNumber(CC, is64BitReg ? Mips::CPU64RegsRegClassID
+                                                : Mips::CPURegsRegClassID);
 
   if (Name[0] == 'f') {
     StringRef NumString = Name.substr(1);
     unsigned IntVal;
-    if( NumString.getAsInteger(10, IntVal))
-      return -1; // not integer
+    if (NumString.getAsInteger(10, IntVal))
+      return -1; // This is not an integer.
     if (IntVal > 31)
       return -1;
 
@@ -797,18 +823,19 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
     if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
       return getReg(Mips::FGR32RegClassID, IntVal);
     if (Format == FP_FORMAT_D) {
-      if(isFP64()) {
+      if (isFP64()) {
         return getReg(Mips::FGR64RegClassID, IntVal);
       }
-      // only even numbers available as register pairs
-      if (( IntVal > 31) || (IntVal%2 !=  0))
+      // Only even numbers available as register pairs.
+      if ((IntVal > 31) || (IntVal % 2 != 0))
         return -1;
-      return getReg(Mips::AFGR64RegClassID, IntVal/2);
+      return getReg(Mips::AFGR64RegClassID, IntVal / 2);
     }
   }
 
   return -1;
 }
+
 void MipsAsmParser::setDefaultFpFormat() {
 
   if (isMips64() || isFP64())
@@ -828,6 +855,7 @@ bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){
 
   return IsDouble;
 }
+
 void MipsAsmParser::setFpFormat(StringRef Format) {
 
   FpFormat = StringSwitch<FpFormatTy>(Format.lower())
@@ -850,7 +878,7 @@ int MipsAsmParser::getATReg() {
   return Options.getATRegNum();
 }
 
-unsigned MipsAsmParser::getReg(int RC,int RegNo) {
+unsigned MipsAsmParser::getReg(int RC, int RegNo) {
   return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
 }
 
@@ -871,14 +899,12 @@ int MipsAsmParser::tryParseRegister(bool is64BitReg) {
     RegNum = matchRegisterName(lowerCase, is64BitReg);
   } else if (Tok.is(AsmToken::Integer))
     RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
-                                   is64BitReg ? Mips::CPU64RegsRegClassID
-                                              : Mips::CPURegsRegClassID);
+        is64BitReg ? Mips::CPU64RegsRegClassID : Mips::CPURegsRegClassID);
   return RegNum;
 }
 
-bool MipsAsmParser::
-  tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                          bool is64BitReg){
+bool MipsAsmParser::tryParseRegisterOperand(
+             SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool is64BitReg) {
 
   SMLoc S = Parser.getTok().getLoc();
   int RegNo = -1;
@@ -888,7 +914,7 @@ bool MipsAsmParser::
     return true;
 
   Operands.push_back(MipsOperand::CreateReg(RegNo, S,
-      Parser.getTok().getLoc()));
+                                            Parser.getTok().getLoc()));
   Parser.Lex(); // Eat register token.
   return false;
 }
@@ -911,19 +937,19 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
   case AsmToken::Dollar: {
-    // parse register
+    // Parse the register.
     SMLoc S = Parser.getTok().getLoc();
     Parser.Lex(); // Eat dollar token.
-    // parse register operand
+    // Parse the register operand.
     if (!tryParseRegisterOperand(Operands, isMips64())) {
       if (getLexer().is(AsmToken::LParen)) {
-        // check if it is indexed addressing operand
+        // Check if it is indexed addressing operand.
         Operands.push_back(MipsOperand::CreateToken("(", S));
-        Parser.Lex(); // eat parenthesis
+        Parser.Lex(); // Eat the parenthesis.
         if (getLexer().isNot(AsmToken::Dollar))
           return true;
 
-        Parser.Lex(); // eat dollar
+        Parser.Lex(); // Eat the dollar
         if (tryParseRegisterOperand(Operands, isMips64()))
           return true;
 
@@ -936,7 +962,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
       }
       return false;
     }
-    // maybe it is a symbol reference
+    // Maybe it is a symbol reference.
     StringRef Identifier;
     if (Parser.parseIdentifier(Identifier))
       return true;
@@ -945,7 +971,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
 
     MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
 
-    // Otherwise create a symbol ref.
+    // Otherwise create a symbol reference.
     const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
                                                 getContext());
 
@@ -954,16 +980,16 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
   }
   case AsmToken::Identifier:
     // Look for the existing symbol, we should check if
-    // we need to assigne the propper RegisterKind
-   if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
-     return false;
-    //else drop to expression parsing
+    // we need to assigne the propper RegisterKind.
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_None))
+      return false;
+    // Else drop to expression parsing.
   case AsmToken::LParen:
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
   case AsmToken::String: {
-     // quoted label names
+    // Quoted label names.
     const MCExpr *IdVal;
     SMLoc S = Parser.getTok().getLoc();
     if (getParser().parseExpression(IdVal))
@@ -973,9 +999,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     return false;
   }
   case AsmToken::Percent: {
-    // it is a symbol reference or constant expression
+    // It is a symbol reference or constant expression.
     const MCExpr *IdVal;
-    SMLoc S = Parser.getTok().getLoc(); // start location of the operand
+    SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
     if (parseRelocOperand(IdVal))
       return true;
 
@@ -988,131 +1014,200 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
   return true;
 }
 
-bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+const MCExpr* MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
+                                               StringRef RelocStr) {
+  const MCExpr *Res;
+  // Check the type of the expression.
+  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Expr)) {
+    // It's a constant, evaluate lo or hi value.
+    if (RelocStr == "lo") {
+      short Val = MCE->getValue();
+      Res = MCConstantExpr::Create(Val, getContext());
+    } else if (RelocStr == "hi") {
+      int Val = MCE->getValue();
+      int LoSign = Val & 0x8000;
+      Val = (Val & 0xffff0000) >> 16;
+      // Lower part is treated as a signed int, so if it is negative
+      // we must add 1 to the hi part to compensate.
+      if (LoSign)
+        Val++;
+      Res = MCConstantExpr::Create(Val, getContext());
+    } else {
+      llvm_unreachable("Invalid RelocStr value");
+    }
+    return Res;
+  }
+
+  if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) {
+    // It's a symbol, create a symbolic expression from the symbol.
+    StringRef Symbol = MSRE->getSymbol().getName();
+    MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr);
+    Res = MCSymbolRefExpr::Create(Symbol, VK, getContext());
+    return Res;
+  }
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+    const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr);
+    const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr);
+    Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext());
+    return Res;
+  }
 
-  Parser.Lex(); // eat % token
-  const AsmToken &Tok = Parser.getTok(); // get next token, operation
+  if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) {
+    const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr);
+    Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext());
+    return Res;
+  }
+  // Just return the original expression.
+  return Expr;
+}
+
+bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
+
+  switch (Expr->getKind()) {
+  case MCExpr::Constant:
+    return true;
+  case MCExpr::SymbolRef:
+    return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
+  case MCExpr::Binary:
+    if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+      if (!isEvaluated(BE->getLHS()))
+        return false;
+      return isEvaluated(BE->getRHS());
+    }
+  case MCExpr::Unary:
+    return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
+  default:
+    return false;
+  }
+  return false;
+}
+
+bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+  Parser.Lex(); // Eat the % token.
+  const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
   if (Tok.isNot(AsmToken::Identifier))
     return true;
 
   std::string Str = Tok.getIdentifier().str();
 
-  Parser.Lex(); // eat identifier
-  // now make expression from the rest of the operand
+  Parser.Lex(); // Eat the identifier.
+  // Now make an expression from the rest of the operand.
   const MCExpr *IdVal;
   SMLoc EndLoc;
 
   if (getLexer().getKind() == AsmToken::LParen) {
     while (1) {
-      Parser.Lex(); // eat '(' token
+      Parser.Lex(); // Eat the '(' token.
       if (getLexer().getKind() == AsmToken::Percent) {
-        Parser.Lex(); // eat % token
+        Parser.Lex(); // Eat the % token.
         const AsmToken &nextTok = Parser.getTok();
         if (nextTok.isNot(AsmToken::Identifier))
           return true;
         Str += "(%";
         Str += nextTok.getIdentifier();
-        Parser.Lex(); // eat identifier
+        Parser.Lex(); // Eat the identifier.
         if (getLexer().getKind() != AsmToken::LParen)
           return true;
       } else
         break;
     }
-    if (getParser().parseParenExpression(IdVal,EndLoc))
+    if (getParser().parseParenExpression(IdVal, EndLoc))
       return true;
 
     while (getLexer().getKind() == AsmToken::RParen)
-      Parser.Lex(); // eat ')' token
+      Parser.Lex(); // Eat the ')' token.
 
   } else
-    return true; // parenthesis must follow reloc operand
-
-  // Check the type of the expression
-  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
-    // It's a constant, evaluate lo or hi value
-    if (Str == "lo") {
-      short Val = MCE->getValue();
-      Res = MCConstantExpr::Create(Val, getContext());
-    } else if (Str == "hi") {
-      int Val = MCE->getValue();
-      int LoSign = Val & 0x8000;
-      Val = (Val & 0xffff0000) >> 16;
-      // Lower part is treated as a signed int, so if it is negative
-      // we must add 1 to the hi part to compensate
-      if (LoSign)
-        Val++;
-      Res = MCConstantExpr::Create(Val, getContext());
-    }
-    return false;
-  }
+    return true; // Parenthesis must follow the relocation operand.
 
-  if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
-    // It's a symbol, create symbolic expression from symbol
-    StringRef Symbol = MSRE->getSymbol().getName();
-    MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
-    Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
-    return false;
-  }
-  return true;
+  Res = evaluateRelocExpr(IdVal, Str);
+  return false;
 }
 
 bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                   SMLoc &EndLoc) {
-
   StartLoc = Parser.getTok().getLoc();
   RegNo = tryParseRegister(isMips64());
   EndLoc = Parser.getTok().getLoc();
-  return (RegNo == (unsigned)-1);
+  return (RegNo == (unsigned) -1);
 }
 
-bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
-
+bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
   SMLoc S;
+  bool Result = true;
 
-  switch(getLexer().getKind()) {
+  while (getLexer().getKind() == AsmToken::LParen)
+    Parser.Lex();
+
+  switch (getLexer().getKind()) {
   default:
     return true;
   case AsmToken::Identifier:
+  case AsmToken::LParen:
   case AsmToken::Integer:
   case AsmToken::Minus:
   case AsmToken::Plus:
-    return (getParser().parseExpression(Res));
+    if (isParenExpr)
+      Result = getParser().parseParenExpression(Res, S);
+    else
+      Result = (getParser().parseExpression(Res));
+    while (getLexer().getKind() == AsmToken::RParen)
+      Parser.Lex();
+    break;
   case AsmToken::Percent:
-    return parseRelocOperand(Res);
-  case AsmToken::LParen:
-    return false;  // it's probably assuming 0
+    Result = parseRelocOperand(Res);
   }
-  return true;
+  return Result;
 }
 
 MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
-               SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+                               SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
 
   const MCExpr *IdVal = 0;
   SMLoc S;
-  // first operand is the offset
+  bool isParenExpr = false;
+  // First operand is the offset.
   S = Parser.getTok().getLoc();
 
-  if (parseMemOffset(IdVal))
-    return MatchOperand_ParseFail;
+  if (getLexer().getKind() == AsmToken::LParen) {
+    Parser.Lex();
+    isParenExpr = true;
+  }
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
-  if (Tok.isNot(AsmToken::LParen)) {
-    MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
-    if (Mnemonic->getToken() == "la") {
-      SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
-      Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
-      return MatchOperand_Success;
+  if (getLexer().getKind() != AsmToken::Dollar) {
+    if (parseMemOffset(IdVal, isParenExpr))
+      return MatchOperand_ParseFail;
+
+    const AsmToken &Tok = Parser.getTok(); // Get the next token.
+    if (Tok.isNot(AsmToken::LParen)) {
+      MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
+      if (Mnemonic->getToken() == "la") {
+        SMLoc E = SMLoc::getFromPointer(
+            Parser.getTok().getLoc().getPointer() - 1);
+        Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+        return MatchOperand_Success;
+      }
+      if (Tok.is(AsmToken::EndOfStatement)) {
+        SMLoc E = SMLoc::getFromPointer(
+            Parser.getTok().getLoc().getPointer() - 1);
+
+        // Zero register assumed, add a memory operand with ZERO as its base.
+        Operands.push_back(MipsOperand::CreateMem(isMips64() ? Mips::ZERO_64
+                                                             : Mips::ZERO,
+                           IdVal, S, E));
+        return MatchOperand_Success;
+      }
+      Error(Parser.getTok().getLoc(), "'(' expected");
+      return MatchOperand_ParseFail;
     }
-    Error(Parser.getTok().getLoc(), "'(' expected");
-    return MatchOperand_ParseFail;
-  }
 
-  Parser.Lex(); // Eat '(' token.
+    Parser.Lex(); // Eat the '(' token.
+  }
 
-  const AsmToken &Tok1 = Parser.getTok(); // get next token
+  const AsmToken &Tok1 = Parser.getTok(); // Get next token
   if (Tok1.is(AsmToken::Dollar)) {
-    Parser.Lex(); // Eat '$' token.
+    Parser.Lex(); // Eat the '$' token.
     if (tryParseRegisterOperand(Operands, isMips64())) {
       Error(Parser.getTok().getLoc(), "unexpected token in operand");
       return MatchOperand_ParseFail;
@@ -1123,7 +1218,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
     return MatchOperand_ParseFail;
   }
 
-  const AsmToken &Tok2 = Parser.getTok(); // get next token
+  const AsmToken &Tok2 = Parser.getTok(); // Get next token.
   if (Tok2.isNot(AsmToken::RParen)) {
     Error(Parser.getTok().getLoc(), "')' expected");
     return MatchOperand_ParseFail;
@@ -1131,17 +1226,26 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
 
   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
 
-  Parser.Lex(); // Eat ')' token.
+  Parser.Lex(); // Eat the ')' token.
 
   if (IdVal == 0)
     IdVal = MCConstantExpr::Create(0, getContext());
 
-  // now replace register operand with the mem operand
+  // Replace the register operand with the memory operand.
   MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
   int RegNo = op->getReg();
-  // remove register from operands
+  // Remove the register from the operands.
   Operands.pop_back();
-  // and add memory operand
+  // Add the memory operand.
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
+    int64_t Imm;
+    if (IdVal->EvaluateAsAbsolute(Imm))
+      IdVal = MCConstantExpr::Create(Imm, getContext());
+    else if (BE->getLHS()->getKind() != MCExpr::SymbolRef)
+      IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
+                                   getContext());
+  }
+
   Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E));
   delete op;
   return MatchOperand_Success;
@@ -1153,17 +1257,17 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (!isMips64())
     return MatchOperand_NoMatch;
   if (getLexer().getKind() == AsmToken::Identifier) {
-    if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_CPU64Regs))
       return MatchOperand_Success;
     return MatchOperand_NoMatch;
   }
-  // if the first token is not '$' we have an error
+  // If the first token is not '$', we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat $
-  if(!tryParseRegisterOperand(Operands, true)) {
-    // set the proper register kind
+  if (!tryParseRegisterOperand(Operands, true)) {
+    // Set the proper register kind.
     MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
     op->setRegKind(MipsOperand::Kind_CPU64Regs);
     return MatchOperand_Success;
@@ -1171,9 +1275,8 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_NoMatch;
 }
 
-bool MipsAsmParser::
-searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                  unsigned RegisterKind) {
+bool MipsAsmParser::searchSymbolAlias(
+    SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned RegisterKind) {
 
   MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
   if (Sym) {
@@ -1187,13 +1290,13 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
       const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
       const StringRef DefSymbol = Ref->getSymbol().getName();
       if (DefSymbol.startswith("$")) {
-        // Lookup for the register with corresponding name
-        int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+        // Lookup for the register with the corresponding name.
+        int RegNum = matchRegisterName(DefSymbol.substr(1), isMips64());
         if (RegNum > -1) {
           Parser.Lex();
-          MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
-                                         Parser.getTok().getLoc());
-          op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+          MipsOperand *op = MipsOperand::CreateReg(RegNum, S,
+                                                   Parser.getTok().getLoc());
+          op->setRegKind((MipsOperand::RegisterKind) RegisterKind);
           Operands.push_back(op);
           return true;
         }
@@ -1201,29 +1304,30 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     } else if (Expr->getKind() == MCExpr::Constant) {
       Parser.Lex();
       const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
-      MipsOperand *op = MipsOperand::CreateImm(Const,S,
-                                     Parser.getTok().getLoc());
+      MipsOperand *op = MipsOperand::CreateImm(Const, S,
+          Parser.getTok().getLoc());
       Operands.push_back(op);
       return true;
     }
   }
   return false;
 }
+
 MipsAsmParser::OperandMatchResultTy
 MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (getLexer().getKind() == AsmToken::Identifier) {
-    if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_CPURegs))
       return MatchOperand_Success;
     return MatchOperand_NoMatch;
   }
-  // if the first token is not '$' we have an error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat $
-  if(!tryParseRegisterOperand(Operands, false)) {
-    // set the propper register kind
+  if (!tryParseRegisterOperand(Operands, false)) {
+    // Set the proper register kind.
     MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
     op->setRegKind(MipsOperand::Kind_CPURegs);
     return MatchOperand_Success;
@@ -1237,87 +1341,88 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (isMips64())
     return MatchOperand_NoMatch;
 
-  // if the first token is not '$' we have error
+  // If the first token is not '$' we have error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
-  Parser.Lex(); // Eat $
+  Parser.Lex(); // Eat the '$'.
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get the next token.
   if (Tok.isNot(AsmToken::Integer))
     return MatchOperand_NoMatch;
 
   unsigned RegNum = Tok.getIntVal();
-  // at the moment only hwreg29 is supported
+  // At the moment only hwreg29 is supported.
   if (RegNum != 29)
     return MatchOperand_ParseFail;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
-        Parser.getTok().getLoc());
+      Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_HWRegs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
 MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseHW64Regs(
+    SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (!isMips64())
     return MatchOperand_NoMatch;
-    //if the first token is not '$' we have error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat $
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get the next token.
   if (Tok.isNot(AsmToken::Integer))
     return MatchOperand_NoMatch;
 
   unsigned RegNum = Tok.getIntVal();
-  // at the moment only hwreg29 is supported
+  // At the moment only hwreg29 is supported.
   if (RegNum != 29)
     return MatchOperand_ParseFail;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
-        Parser.getTok().getLoc());
+                                           Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_HW64Regs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
 MipsAsmParser::OperandMatchResultTy
 MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   unsigned RegNum;
-  //if the first token is not '$' we have error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
-  Parser.Lex(); // Eat $
+  Parser.Lex(); // Eat the '$'
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get next token.
   if (Tok.is(AsmToken::Integer)) {
     RegNum = Tok.getIntVal();
-    // at the moment only fcc0 is supported
+    // At the moment only fcc0 is supported.
     if (RegNum != 0)
       return MatchOperand_ParseFail;
   } else if (Tok.is(AsmToken::Identifier)) {
-    // at the moment only fcc0 is supported
+    // At the moment only fcc0 is supported.
     if (Tok.getIdentifier() != "fcc0")
       return MatchOperand_ParseFail;
   } else
     return MatchOperand_NoMatch;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
-        Parser.getTok().getLoc());
+                                           Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_CCRRegs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
@@ -1349,23 +1454,23 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
 
 static int ConvertCcString(StringRef CondString) {
   int CC = StringSwitch<unsigned>(CondString)
-      .Case(".f",    0)
-      .Case(".un",   1)
-      .Case(".eq",   2)
-      .Case(".ueq",  3)
-      .Case(".olt",  4)
-      .Case(".ult",  5)
-      .Case(".ole",  6)
-      .Case(".ule",  7)
-      .Case(".sf",   8)
-      .Case(".ngle", 9)
-      .Case(".seq",  10)
-      .Case(".ngl",  11)
-      .Case(".lt",   12)
-      .Case(".nge",  13)
-      .Case(".le",   14)
-      .Case(".ngt",  15)
-      .Default(-1);
+    .Case(".f",    0)
+    .Case(".un",   1)
+    .Case(".eq",   2)
+    .Case(".ueq",  3)
+    .Case(".olt",  4)
+    .Case(".ult",  5)
+    .Case(".ole",  6)
+    .Case(".ule",  7)
+    .Case(".sf",   8)
+    .Case(".ngle", 9)
+    .Case(".seq",  10)
+    .Case(".ngl",  11)
+    .Case(".lt",   12)
+    .Case(".nge",  13)
+    .Case(".le",   14)
+    .Case(".ngt",  15)
+    .Default(-1);
 
   return CC;
 }
@@ -1373,16 +1478,16 @@ static int ConvertCcString(StringRef CondString) {
 bool MipsAsmParser::
 parseMathOperation(StringRef Name, SMLoc NameLoc,
                    SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  // split the format
+  // Split the format.
   size_t Start = Name.find('.'), Next = Name.rfind('.');
   StringRef Format1 = Name.slice(Start, Next);
-  // and add the first format to the operands
+  // Add the first format to the operands.
   Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc));
-  // now for the second format
+  // Now for the second format.
   StringRef Format2 = Name.slice(Next, StringRef::npos);
   Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc));
 
-  // set the format for the first register
+  // Set the format for the first register.
   setFpFormat(Format1);
 
   // Read the remaining operands.
@@ -1398,11 +1503,10 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
       SMLoc Loc = getLexer().getLoc();
       Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
-
     }
-    Parser.Lex();  // Eat the comma.
+    Parser.Lex(); // Eat the comma.
 
-    //set the format for the first register
+    // Set the format for the first register
     setFpFormat(Format2);
 
     // Parse and remember the operand.
@@ -1419,7 +1523,7 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
     return Error(Loc, "unexpected token in argument list");
   }
 
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
@@ -1427,13 +1531,12 @@ bool MipsAsmParser::
 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   StringRef Mnemonic;
-  // floating point instructions: should register be treated as double?
+  // Floating point instructions: Should the register be treated as a double?
   if (requestsDoubleOperand(Name)) {
     setFpFormat(FP_FORMAT_D);
-  Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
-  Mnemonic = Name;
-  }
-  else {
+    Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+    Mnemonic = Name;
+  } else {
     setDefaultFpFormat();
     // Create the leading tokens for the mnemonic, split by '.' characters.
     size_t Start = 0, Next = Name.find('.');
@@ -1442,30 +1545,30 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
     Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
 
     if (Next != StringRef::npos) {
-      // there is a format token in mnemonic
-      // StringRef Rest = Name.slice(Next, StringRef::npos);
-      size_t Dot = Name.find('.', Next+1);
+      // There is a format token in mnemonic.
+      size_t Dot = Name.find('.', Next + 1);
       StringRef Format = Name.slice(Next, Dot);
-      if (Dot == StringRef::npos) //only one '.' in a string, it's a format
+      if (Dot == StringRef::npos) // Only one '.' in a string, it's a format.
         Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
       else {
-        if (Name.startswith("c.")){
-          // floating point compare, add '.' and immediate represent for cc
+        if (Name.startswith("c.")) {
+          // Floating point compare, add '.' and immediate represent for cc.
           Operands.push_back(MipsOperand::CreateToken(".", NameLoc));
           int Cc = ConvertCcString(Format);
           if (Cc == -1) {
             return Error(NameLoc, "Invalid conditional code");
           }
           SMLoc E = SMLoc::getFromPointer(
-              Parser.getTok().getLoc().getPointer() -1 );
-          Operands.push_back(MipsOperand::CreateImm(
-              MCConstantExpr::Create(Cc, getContext()), NameLoc, E));
+              Parser.getTok().getLoc().getPointer() - 1);
+          Operands.push_back(
+              MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()),
+                                     NameLoc, E));
         } else {
           // trunc, ceil, floor ...
           return parseMathOperation(Name, NameLoc, Operands);
         }
 
-        // the rest is a format
+        // The rest is a format.
         Format = Name.slice(Dot, StringRef::npos);
         Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
       }
@@ -1483,8 +1586,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
       return Error(Loc, "unexpected token in argument list");
     }
 
-    while (getLexer().is(AsmToken::Comma) ) {
-      Parser.Lex();  // Eat the comma.
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat the comma.
 
       // Parse and remember the operand.
       if (ParseOperand(Operands, Name)) {
@@ -1501,48 +1604,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
     return Error(Loc, "unexpected token in argument list");
   }
 
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
-   SMLoc Loc = getLexer().getLoc();
-   Parser.eatToEndOfStatement();
-   return Error(Loc, ErrorMsg);
+  SMLoc Loc = getLexer().getLoc();
+  Parser.eatToEndOfStatement();
+  return Error(Loc, ErrorMsg);
 }
 
 bool MipsAsmParser::parseSetNoAtDirective() {
-  // Line should look like:
-  //  .set noat
-  // set at reg to 0
+  // Line should look like: ".set noat".
+  // set at reg to 0.
   Options.setATReg(0);
   // eat noat
   Parser.Lex();
-  // If this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
+
 bool MipsAsmParser::parseSetAtDirective() {
-  // line can be
-  //  .set at - defaults to $1
+  // Line can be .set at - defaults to $1
   // or .set at=$reg
   int AtRegNo;
   getParser().Lex();
   if (getLexer().is(AsmToken::EndOfStatement)) {
     Options.setATReg(1);
-    Parser.Lex(); // Consume the EndOfStatement
+    Parser.Lex(); // Consume the EndOfStatement.
     return false;
   } else if (getLexer().is(AsmToken::Equal)) {
-    getParser().Lex(); // eat '='
+    getParser().Lex(); // Eat the '='.
     if (getLexer().isNot(AsmToken::Dollar)) {
       reportParseError("unexpected token in statement");
       return false;
     }
-    Parser.Lex(); // Eat '$'
+    Parser.Lex(); // Eat the '$'.
     const AsmToken &Reg = Parser.getTok();
     if (Reg.is(AsmToken::Identifier)) {
       AtRegNo = matchCPURegisterName(Reg.getIdentifier());
@@ -1553,7 +1655,7 @@ bool MipsAsmParser::parseSetAtDirective() {
       return false;
     }
 
-    if ( AtRegNo < 1 || AtRegNo > 31) {
+    if (AtRegNo < 1 || AtRegNo > 31) {
       reportParseError("unexpected token in statement");
       return false;
     }
@@ -1562,13 +1664,13 @@ bool MipsAsmParser::parseSetAtDirective() {
       reportParseError("unexpected token in statement");
       return false;
     }
-    getParser().Lex(); // Eat reg
+    getParser().Lex(); // Eat the register.
 
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       reportParseError("unexpected token in statement");
       return false;
-     }
-    Parser.Lex(); // Consume the EndOfStatement
+    }
+    Parser.Lex(); // Consume the EndOfStatement.
     return false;
   } else {
     reportParseError("unexpected token in statement");
@@ -1578,43 +1680,43 @@ bool MipsAsmParser::parseSetAtDirective() {
 
 bool MipsAsmParser::parseSetReorderDirective() {
   Parser.Lex();
-  // If this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
   Options.setReorder();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::parseSetNoReorderDirective() {
-    Parser.Lex();
-    // if this is not the end of the statement, report error
-    if (getLexer().isNot(AsmToken::EndOfStatement)) {
-      reportParseError("unexpected token in statement");
-      return false;
-    }
-    Options.setNoreorder();
-    Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex();
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token in statement");
     return false;
+  }
+  Options.setNoreorder();
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
 }
 
 bool MipsAsmParser::parseSetMacroDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
   Options.setMacro();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::parseSetNoMacroDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("`noreorder' must be set before `nomacro'");
     return false;
@@ -1624,7 +1726,7 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
     return false;
   }
   Options.setNomacro();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
@@ -1637,24 +1739,24 @@ bool MipsAsmParser::parseSetAssignment() {
 
   if (getLexer().isNot(AsmToken::Comma))
     return reportParseError("unexpected token in .set directive");
-  Lex(); //eat comma
+  Lex(); // Eat comma
 
   if (Parser.parseExpression(Value))
     reportParseError("expected valid expression after comma");
 
-  // check if the Name already exists as a symbol
+  // Check if the Name already exists as a symbol.
   MCSymbol *Sym = getContext().LookupSymbol(Name);
-  if (Sym) {
+  if (Sym)
     return reportParseError("symbol already defined");
-  }
   Sym = getContext().GetOrCreateSymbol(Name);
   Sym->setVariableValue(Value);
 
   return false;
 }
+
 bool MipsAsmParser::parseDirectiveSet() {
 
-  // get next token
+  // Get the next token.
   const AsmToken &Tok = Parser.getTok();
 
   if (Tok.getString() == "noat") {
@@ -1670,15 +1772,15 @@ bool MipsAsmParser::parseDirectiveSet() {
   } else if (Tok.getString() == "nomacro") {
     return parseSetNoMacroDirective();
   } else if (Tok.getString() == "nomips16") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   } else if (Tok.getString() == "nomicromips") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   } else {
-    // it is just an identifier, look for assignment
+    // It is just an identifier, look for an assignment.
     parseSetAssignment();
     return false;
   }
@@ -1715,20 +1817,20 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
   StringRef IDVal = DirectiveID.getString();
 
-  if ( IDVal == ".ent") {
-    // ignore this directive for now
+  if (IDVal == ".ent") {
+    // Ignore this directive for now.
     Parser.Lex();
     return false;
   }
 
   if (IDVal == ".end") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.Lex();
     return false;
   }
 
   if (IDVal == ".frame") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
@@ -1738,19 +1840,19 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
   }
 
   if (IDVal == ".fmask") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
 
   if (IDVal == ".mask") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
 
   if (IDVal == ".gpword") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 59e49d8..0dba33a 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -143,6 +143,16 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
                                                  uint64_t Address,
                                                  const void *Decoder);
 
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
 static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                        unsigned Offset,
                                        uint64_t Address,
@@ -496,6 +506,30 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  if (RegNo >= 4)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  if (RegNo >= 4)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                        unsigned Offset,
                                        uint64_t Address,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index e198a7c..9460731 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -27,6 +27,9 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_INSTRMAP_INFO
+#include "MipsGenInstrInfo.inc"
+
 using namespace llvm;
 
 namespace {
@@ -35,12 +38,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
   void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   const MCInstrInfo &MCII;
   MCContext &Ctx;
+  const MCSubtargetInfo &STI;
   bool IsLittleEndian;
 
 public:
   MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
                     const MCSubtargetInfo &sti, bool IsLittle) :
-    MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
+    MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {}
 
   ~MipsMCCodeEmitter() {}
 
@@ -88,6 +92,9 @@ public:
   unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
                               SmallVectorImpl<MCFixup> &Fixups) const;
 
+  unsigned
+  getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const;
+
 }; // class MipsMCCodeEmitter
 }  // namespace
 
@@ -141,6 +148,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
     llvm_unreachable("unimplemented opcode in EncodeInstruction()");
 
+  if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
+    int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips);
+    if (NewOpcode != -1) {
+      Opcode = NewOpcode;
+      TmpInst.setOpcode (NewOpcode);
+      Binary = getBinaryCodeForInstr(TmpInst, Fixups);
+    }
+  }
+
   const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
 
   // Get byte count of instruction
@@ -192,35 +208,24 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
 unsigned MipsMCCodeEmitter::
-getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-                  SmallVectorImpl<MCFixup> &Fixups) const {
-  if (MO.isReg()) {
-    unsigned Reg = MO.getReg();
-    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
-    return RegNo;
-  } else if (MO.isImm()) {
-    return static_cast<unsigned>(MO.getImm());
-  } else if (MO.isFPImm()) {
-    return static_cast<unsigned>(APFloat(MO.getFPImm())
-        .bitcastToAPInt().getHiBits(32).getLimitedValue());
-  }
+getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
+  int64_t Res;
 
-  // MO must be an Expr.
-  assert(MO.isExpr());
+  if (Expr->EvaluateAsAbsolute(Res))
+    return Res;
 
-  const MCExpr *Expr = MO.getExpr();
   MCExpr::ExprKind Kind = Expr->getKind();
+  if (Kind == MCExpr::Constant) {
+    return cast<MCConstantExpr>(Expr)->getValue();
+  }
 
   if (Kind == MCExpr::Binary) {
-    Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS();
-    Kind = Expr->getKind();
+    unsigned Res = getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups);
+    Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups);
+    return Res;
   }
-
-  assert (Kind == MCExpr::SymbolRef);
-
+  if (Kind == MCExpr::SymbolRef) {
   Mips::Fixups FixupKind = Mips::Fixups(0);
 
   switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
@@ -300,12 +305,32 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     break;
   } // switch
 
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
-
-  // All of the information is in the fixup.
+    Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind)));
+    return 0;
+  }
   return 0;
 }
 
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
+    return RegNo;
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isFPImm()) {
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+        .bitcastToAPInt().getHiBits(32).getLimitedValue());
+  }
+  // MO must be an Expr.
+  assert(MO.isExpr());
+  return getExprOpValue(MO.getExpr(),Fixups);
+}
+
 /// getMemEncoding - Return binary encoding of memory related operand.
 /// If the offset operand requires relocation, record the relocation.
 unsigned
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
new file mode 100644
index 0000000..665b4d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -0,0 +1,112 @@
+class MMArch {
+  string Arch = "micromips";
+  list<dag> Pattern = [];
+}
+
+class ADD_FM_MM<bits<6> op, bits<10> funct> : MMArch {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> rd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0;
+  let Inst{9-0}   = funct;
+}
+
+class ADDI_FM_MM<bits<6> op> : MMArch {
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-0}  = imm16;
+}
+
+class SLTI_FM_MM<bits<6> op> : MMArch {
+  bits<5> rt;
+  bits<5> rs;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class LUI_FM_MM : MMArch {
+  bits<5> rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x10;
+  let Inst{25-21} = 0xd;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class MULT_FM_MM<bits<10> funct> : MMArch {
+  bits<5>  rs;
+  bits<5>  rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x00;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0x3c;
+}
+
+class SRA_FM_MM<bits<10> funct, bit rotate> : MMArch {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> shamt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rd;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = shamt;
+  let Inst{10}    = rotate;
+  let Inst{9-0}   = funct;
+}
+
+class SRLV_FM_MM<bits<10> funct, bit rotate> : MMArch {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = rotate;
+  let Inst{9-0}   = funct;
+}
+
+class LW_FM_MM<bits<6> op> : MMArch {
+  bits<5> rt;
+  bits<21> addr;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = addr{20-16};
+  let Inst{15-0}  = addr{15-0};
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
new file mode 100644
index 0000000..74cdccd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -0,0 +1,67 @@
+let isCodeGenOnly = 1 in {
+  /// Arithmetic Instructions (ALU Immediate)
+  def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd>,
+                 ADDI_FM_MM<0xc>;
+  def ADDi_MM  : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>,
+                 ADDI_FM_MM<0x4>;
+  def SLTi_MM  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+                 SLTI_FM_MM<0x24>;
+  def SLTiu_MM : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+                 SLTI_FM_MM<0x2c>;
+  def ANDi_MM  : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+                 ADDI_FM_MM<0x34>;
+  def ORi_MM   : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+                 ADDI_FM_MM<0x14>;
+  def XORi_MM  : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+                 ADDI_FM_MM<0x1c>;
+  def LUi_MM   : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM_MM;
+
+  /// Arithmetic Instructions (3-Operand, R-Type)
+  def ADDu_MM  : MMRel, ArithLogicR<"addu", CPURegsOpnd>, ADD_FM_MM<0, 0x150>;
+  def SUBu_MM  : MMRel, ArithLogicR<"subu", CPURegsOpnd>, ADD_FM_MM<0, 0x1d0>;
+  def MUL_MM   : MMRel, ArithLogicR<"mul", CPURegsOpnd>, ADD_FM_MM<0, 0x210>;
+  def ADD_MM   : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM_MM<0, 0x110>;
+  def SUB_MM   : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM_MM<0, 0x190>;
+  def SLT_MM   : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM_MM<0, 0x350>;
+  def SLTu_MM  : MMRel, SetCC_R<"sltu", setult, CPURegs>,
+                 ADD_FM_MM<0, 0x390>;
+  def AND_MM   : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+                 ADD_FM_MM<0, 0x250>;
+  def OR_MM    : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+                 ADD_FM_MM<0, 0x290>;
+  def XOR_MM   : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+                 ADD_FM_MM<0, 0x310>;
+  def NOR_MM   : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM_MM<0, 0x2d0>;
+  def MULT_MM  : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+                 MULT_FM_MM<0x22c>;
+  def MULTu_MM : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+                 MULT_FM_MM<0x26c>;
+
+  /// Shift Instructions
+  def SLL_MM   : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0, 0>;
+  def SRL_MM   : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0x40, 0>;
+  def SRA_MM   : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0x80, 0>;
+  def SLLV_MM  : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd>,
+                 SRLV_FM_MM<0x10, 0>;
+  def SRLV_MM  : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd>,
+                 SRLV_FM_MM<0x50, 0>;
+  def SRAV_MM  : MMRel, shift_rotate_reg<"srav", CPURegsOpnd>,
+                 SRLV_FM_MM<0x90, 0>;
+  def ROTR_MM  : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0xc0, 0>;
+  def ROTRV_MM : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd>,
+                 SRLV_FM_MM<0xd0, 0>;
+
+  /// Load and Store Instructions - aligned
+  defm LB_MM  : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM_MM<0x7>;
+  defm LBu_MM : LoadM<"lbu", CPURegs, zextloadi8>, MMRel, LW_FM_MM<0x5>;
+  defm LH_MM  : LoadM<"lh", CPURegs, sextloadi16>, MMRel, LW_FM_MM<0xf>;
+  defm LHu_MM : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM_MM<0xd>;
+  defm LW_MM  : LoadM<"lw", CPURegs>, MMRel, LW_FM_MM<0x3f>;
+  defm SB_MM  : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM_MM<0x6>;
+  defm SH_MM  : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM_MM<0xe>;
+  defm SW_MM  : StoreM<"sw", CPURegs>, MMRel, LW_FM_MM<0x3e>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 00b3449..c1c635c 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -35,6 +35,11 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
+bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  if (!Subtarget.inMips16Mode())
+    return false;
+  return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
 /// Select multiply instructions.
 std::pair<SDNode*, SDNode*>
 Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
@@ -267,7 +272,7 @@ std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
     EVT VT = LHS.getValueType();
 
     unsigned Sltu_op = Mips::SltuRxRyRz16;
-    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops);
     unsigned Addu_op = Mips::AdduRxRyRz16;
     SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
                                               SDValue(Carry,0), RHS);
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
index baa8587..f05f9b7 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -28,6 +28,8 @@ private:
 
   SDValue getMips16SPAliasReg();
 
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
   void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
 
   virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 23eb537..f63318f 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -53,7 +53,6 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
   if (Mips16HardFloat)
     setMips16HardFloatLibCalls();
 
-  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
   setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Expand);
   setOperationAction(ISD::ATOMIC_CMP_SWAP,    MVT::i32,   Expand);
   setOperationAction(ISD::ATOMIC_SWAP,        MVT::i32,   Expand);
@@ -614,7 +613,8 @@ MachineBasicBlock
   unsigned regX = MI->getOperand(0).getReg();
   unsigned regY = MI->getOperand(1).getReg();
   MachineBasicBlock *target = MI->getOperand(2).getMBB();
-  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX)
+    .addReg(regY);
   BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
@@ -636,7 +636,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
     CmpOpc = CmpiXOpc;
   else
     llvm_unreachable("immediate field not usable");
-  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX)
+    .addImm(imm);
   BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index 6cca227..7ad18f2 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,5 +1,4 @@
-
-//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index 846a822..fc533fb 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -66,14 +66,12 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc],
   defm ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap64<atomic_cmp_swap_64>;
 }
 
-/// Pseudo instructions for loading, storing and copying accumulator registers.
+/// Pseudo instructions for loading and storing accumulator registers.
 let isPseudo = 1 in {
   defm LOAD_AC128  : LoadM<"load_ac128", ACRegs128>;
   defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
 }
 
-def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
-
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 1876cb6..6e4feda 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -46,6 +46,10 @@
 using namespace llvm;
 
 bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  // Initialize TargetLoweringObjectFile.
+  if (Subtarget->allowMixed16_32())
+    const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+      .Initialize(OutContext, TM);
   MipsFI = MF.getInfo<MipsFunctionInfo>();
   AsmPrinter::runOnMachineFunction(MF);
   return true;
@@ -245,12 +249,18 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
 void MipsAsmPrinter::EmitFunctionBodyStart() {
   MCInstLowering.Initialize(Mang, &MF->getContext());
 
-  emitFrameDirective();
+  bool IsNakedFunction =
+    MF->getFunction()->
+      getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                   Attribute::Naked);
+  if (!IsNakedFunction)
+    emitFrameDirective();
 
   if (OutStreamer.hasRawTextSupport()) {
     SmallString<128> Str;
     raw_svector_ostream OS(Str);
-    printSavedRegsBitmask(OS);
+    if (!IsNakedFunction)
+      printSavedRegsBitmask(OS);
     OutStreamer.EmitRawText(OS.str());
     if (!Subtarget->inMips16Mode()) {
       OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
@@ -419,12 +429,18 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                            unsigned OpNum, unsigned AsmVariant,
                                            const char *ExtraCode,
                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
+  int Offset = 0;
+  // Currently we are expecting either no ExtraCode or 'D'
+  if (ExtraCode) {
+    if (ExtraCode[0] == 'D')
+      Offset = 4;
+    else
+      return true; // Unknown modifier.
+  }
 
   const MachineOperand &MO = MI->getOperand(OpNum);
   assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+  O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
 
   return false;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
index 1d86d90..3fc402b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -116,7 +116,7 @@ private:
                                   int Offset) const;
 
   /// Expand pseudo instructions with accumulator register operands.
-  void expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+  void expandACCInstr(MachineBasicBlock::instr_iterator MI,
                       MachineBasicBlock &MBB, unsigned Opc) const;
 
   /// \brief Expand pseudo instruction. Return true if MI was expanded.
@@ -302,7 +302,7 @@ void MipsCodeEmitter::emitWord(unsigned Word) {
     MCE.emitWordBE(Word);
 }
 
-void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI,
                                      MachineBasicBlock &MBB,
                                      unsigned Opc) const {
   // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index b5de1eb..1951324 100644
--- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -80,6 +80,10 @@ FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
 }
 
 bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
-  return true;
+  // The intention is for this to be a mips16 only pass for now
+  // FIXME:
+  // if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+  //  return false;
+  return false;
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
index a72a763..cf09113 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -219,6 +219,33 @@ class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst {
   let Inst{5-0} = funct;
 }
 
+// MFHI sub-class format.
+class MFHI_FMT<bits<6> funct> : DSPInst {
+  bits<5> rd;
+  bits<2> ac;
+
+  let Inst{31-26} = 0;
+  let Inst{25-23} = 0;
+  let Inst{22-21} = ac;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = rd;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = funct;
+}
+
+// MTHI sub-class format.
+class MTHI_FMT<bits<6> funct> : DSPInst {
+  bits<5> rs;
+  bits<2> ac;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rs;
+  let Inst{20-13} = 0;
+  let Inst{12-11} = ac;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = funct;
+}
+
 // EXTR.W sub-class format (type 1).
 class EXTR_W_TY1_FMT<bits<5> op> : DSPInst {
   bits<5> rt;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index 3c116e1..c12878a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -26,6 +26,8 @@ def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
                                          SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
 def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
                                        SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsSHIFT_DSP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                             SDTCisVT<2, i32>]>;
 
 class MipsDSPBase<string Opc, SDTypeProfile Prof> :
   SDNode<!strconcat("MipsISD::", Opc), Prof>;
@@ -74,18 +76,19 @@ def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>;
 def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>;
 def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
 def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
+def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSETCC_DSP : MipsDSPBase<"SETCC_DSP", SDTSetCC>;
+def MipsSELECT_CC_DSP : MipsDSPBase<"SELECT_CC_DSP", SDTSelectCC>;
 
 // Flags.
-class UseAC {
-  list<Register> Uses = [AC0];
+class Uses<list<Register> Regs> {
+  list<Register> Uses = Regs;
 }
 
-class UseDSPCtrl {
-  list<Register> Uses = [DSPCtrl];
-}
-
-class ClearDefs {
-  list<Register> Defs = [];
+class Defs<list<Register> Regs> {
+  list<Register> Defs = Regs;
 }
 
 // Instruction encoding.
@@ -145,6 +148,10 @@ class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>;
 class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>;
 class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>;
 class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>;
+class MFHI_ENC : MFHI_FMT<0b010000>;
+class MFLO_ENC : MFHI_FMT<0b010010>;
+class MTHI_ENC : MTHI_FMT<0b010001>;
+class MTLO_ENC : MTHI_FMT<0b010011>;
 class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>;
 class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>;
 class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>;
@@ -256,7 +263,6 @@ class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -267,7 +273,6 @@ class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -278,7 +283,6 @@ class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
   list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -289,7 +293,6 @@ class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -300,7 +303,6 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
   list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$src = $rt";
 }
 
@@ -312,7 +314,6 @@ class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -322,7 +323,6 @@ class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
   list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -332,7 +332,6 @@ class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
   list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -343,7 +342,7 @@ class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
   list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
+  bit hasSideEffects = 1;
 }
 
 class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -354,7 +353,6 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern = [(set CPURegs:$rd,
                        (OpNode CPURegs:$base, CPURegs:$index))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
   bit mayLoad = 1;
 }
 
@@ -366,7 +364,6 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -377,7 +374,6 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern =  [(set CPURegs:$rt,
                         (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$src = $rt";
 }
 
@@ -387,7 +383,6 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -396,7 +391,6 @@ class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -405,7 +399,6 @@ class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -415,7 +408,6 @@ class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -425,7 +417,6 @@ class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
-  list<Register> Uses = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -436,7 +427,6 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
   list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))];
   InstrItinClass Itinerary = itin;
-  list<Register> Uses = [DSPCtrl];
 }
 
 class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -446,7 +436,6 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
   list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -455,7 +444,6 @@ class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -482,9 +470,22 @@ class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string Constraints = "$acin = $ac";
 }
 
+class MFHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+  dag OutOperandList = (outs CPURegs:$rd);
+  dag InOperandList = (ins RC:$ac);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
+  InstrItinClass Itinerary = itin;
+}
+
+class MTHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+  dag OutOperandList = (outs RC:$ac);
+  dag InOperandList = (ins CPURegs:$rs);
+  string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+  InstrItinClass Itinerary = itin;
+}
+
 class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
   MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> {
-  list<Register> Uses = [DSPCtrl];
   bit usesCustomInserter = 1;
 }
 
@@ -493,7 +494,6 @@ class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
   dag InOperandList = (ins brtarget:$offset);
   string AsmString = !strconcat(instr_asm, "\t$offset");
   InstrItinClass Itinerary = itin;
-  list<Register> Uses = [DSPCtrl];
   bit isBranch = 1;
   bit isTerminator = 1;
   bit hasDelaySlot = 1;
@@ -506,7 +506,6 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
   list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))];
   InstrItinClass Itinerary = itin;
-  list<Register> Uses = [DSPCtrl];
   string Constraints = "$src = $rt";
 }
 
@@ -515,178 +514,183 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 //===----------------------------------------------------------------------===//
 
 // Addition/subtraction
-class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary,
-                                       DSPRegs, DSPRegs>, IsCommutable;
+class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>, IsCommutable,
+                     Defs<[DSPOutFlag20]>;
 
 class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag20]>;
 
-class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary,
-                                       DSPRegs, DSPRegs>;
+class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>,
+                     Defs<[DSPOutFlag20]>;
 
 class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
-                                         NoItinerary, DSPRegs, DSPRegs>;
+                                         NoItinerary, DSPRegs, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
-class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>, IsCommutable;
+class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>, IsCommutable,
+                     Defs<[DSPOutFlag20]>;
 
 class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag20]>;
 
-class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>;
+class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>,
+                     Defs<[DSPOutFlag20]>;
 
 class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
-                                         NoItinerary, DSPRegs, DSPRegs>;
+                                         NoItinerary, DSPRegs, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
                                         NoItinerary, CPURegs, CPURegs>,
-                      IsCommutable;
+                      IsCommutable, Defs<[DSPOutFlag20]>;
 
 class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
-                                        NoItinerary, CPURegs, CPURegs>;
+                                        NoItinerary, CPURegs, CPURegs>,
+                      Defs<[DSPOutFlag20]>;
 
-class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary,
-                                     CPURegs, CPURegs>, IsCommutable;
+class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary,
+                                     CPURegs, CPURegs>, IsCommutable,
+                   Defs<[DSPCarry]>;
 
-class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary,
+class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary,
                                      CPURegs, CPURegs>,
-                   IsCommutable, UseDSPCtrl;
+                   IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>;
 
 class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
-                                      CPURegs, CPURegs>, ClearDefs;
+                                      CPURegs, CPURegs>;
 
 class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
-                                             NoItinerary, CPURegs, DSPRegs>,
-                        ClearDefs;
+                                             NoItinerary, CPURegs, DSPRegs>;
 
 // Absolute value
 class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
-                                              NoItinerary, DSPRegs>;
+                                              NoItinerary, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
-                                             NoItinerary, CPURegs>;
+                                             NoItinerary, CPURegs>,
+                      Defs<[DSPOutFlag20]>;
 
 // Precision reduce/expand
 class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
                                                  int_mips_precrq_qb_ph,
-                                                 NoItinerary, DSPRegs, DSPRegs>,
-                          ClearDefs;
+                                                 NoItinerary, DSPRegs, DSPRegs>;
 
 class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
                                                 int_mips_precrq_ph_w,
-                                                NoItinerary, DSPRegs, CPURegs>,
-                         ClearDefs;
+                                                NoItinerary, DSPRegs, CPURegs>;
 
 class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
                                                    int_mips_precrq_rs_ph_w,
                                                    NoItinerary, DSPRegs,
-                                                   CPURegs>;
+                                                   CPURegs>,
+                            Defs<[DSPOutFlag22]>;
 
 class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
                                                     int_mips_precrqu_s_qb_ph,
                                                     NoItinerary, DSPRegs,
-                                                    DSPRegs>;
+                                                    DSPRegs>,
+                             Defs<[DSPOutFlag22]>;
 
 class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
                                                  int_mips_preceq_w_phl,
-                                                 NoItinerary, CPURegs, DSPRegs>,
-                          ClearDefs;
+                                                 NoItinerary, CPURegs, DSPRegs>;
 
 class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
                                                  int_mips_preceq_w_phr,
-                                                 NoItinerary, CPURegs, DSPRegs>,
-                          ClearDefs;
+                                                 NoItinerary, CPURegs, DSPRegs>;
 
 class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
                                                    int_mips_precequ_ph_qbl,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
                                                    int_mips_precequ_ph_qbr,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
                                                     int_mips_precequ_ph_qbla,
-                                                    NoItinerary, DSPRegs>,
-                             ClearDefs;
+                                                    NoItinerary, DSPRegs>;
 
 class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
                                                     int_mips_precequ_ph_qbra,
-                                                    NoItinerary, DSPRegs>,
-                             ClearDefs;
+                                                    NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
                                                   int_mips_preceu_ph_qbl,
-                                                  NoItinerary, DSPRegs>,
-                           ClearDefs;
+                                                  NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
                                                   int_mips_preceu_ph_qbr,
-                                                  NoItinerary, DSPRegs>,
-                           ClearDefs;
+                                                  NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
                                                    int_mips_preceu_ph_qbla,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
                                                    int_mips_preceu_ph_qbra,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 // Shift
-class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3,
-                                          NoItinerary, DSPRegs>;
+class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
+                                          NoItinerary, DSPRegs>,
+                     Defs<[DSPOutFlag22]>;
 
 class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
-                                           NoItinerary, DSPRegs>;
+                                           NoItinerary, DSPRegs>,
+                      Defs<[DSPOutFlag22]>;
 
-class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
+                                          NoItinerary, DSPRegs>;
 
 class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
-class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4,
-                                          NoItinerary, DSPRegs>;
+class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
+                                          NoItinerary, DSPRegs>,
+                     Defs<[DSPOutFlag22]>;
 
 class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
-                                           NoItinerary, DSPRegs>;
+                                           NoItinerary, DSPRegs>,
+                      Defs<[DSPOutFlag22]>;
 
 class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
-                                            immZExt4, NoItinerary, DSPRegs>;
+                                            immZExt4, NoItinerary, DSPRegs>,
+                       Defs<[DSPOutFlag22]>;
 
 class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
-                                             NoItinerary, DSPRegs>;
+                                             NoItinerary, DSPRegs>,
+                        Defs<[DSPOutFlag22]>;
 
-class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
+                                          NoItinerary, DSPRegs>;
 
 class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
-                                            immZExt4, NoItinerary, DSPRegs>,
-                       ClearDefs;
+                                            immZExt4, NoItinerary, DSPRegs>;
 
 class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
-                                             NoItinerary, DSPRegs>, ClearDefs;
+                                             NoItinerary, DSPRegs>;
 
 class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
-                                           immZExt5, NoItinerary, CPURegs>;
+                                           immZExt5, NoItinerary, CPURegs>,
+                      Defs<[DSPOutFlag22]>;
 
 class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
-                                            NoItinerary, CPURegs>;
+                                            NoItinerary, CPURegs>,
+                       Defs<[DSPOutFlag22]>;
 
 class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
-                                           immZExt5, NoItinerary, CPURegs>,
-                      ClearDefs;
+                                           immZExt5, NoItinerary, CPURegs>;
 
 class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
                                             NoItinerary, CPURegs>;
@@ -694,36 +698,49 @@ class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
 // Multiplication
 class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
                                               int_mips_muleu_s_ph_qbl,
-                                              NoItinerary, DSPRegs, DSPRegs>;
+                                              NoItinerary, DSPRegs, DSPRegs>,
+                            Defs<[DSPOutFlag21]>;
 
 class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
                                               int_mips_muleu_s_ph_qbr,
-                                              NoItinerary, DSPRegs, DSPRegs>;
+                                              NoItinerary, DSPRegs, DSPRegs>,
+                            Defs<[DSPOutFlag21]>;
 
 class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
                                              int_mips_muleq_s_w_phl,
                                              NoItinerary, CPURegs, DSPRegs>,
-                           IsCommutable;
+                           IsCommutable, Defs<[DSPOutFlag21]>;
 
 class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
                                              int_mips_muleq_s_w_phr,
                                              NoItinerary, CPURegs, DSPRegs>,
-                           IsCommutable;
+                           IsCommutable, Defs<[DSPOutFlag21]>;
 
 class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
                                           NoItinerary, DSPRegs, DSPRegs>,
-                        IsCommutable;
+                        IsCommutable, Defs<[DSPOutFlag21]>;
 
 class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
-                                              MipsMULSAQ_S_W_PH>;
+                                              MipsMULSAQ_S_W_PH>,
+                           Defs<[DSPOutFlag16_19]>;
 
-class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>,
+                          Defs<[DSPOutFlag16_19]>;
 
-class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>,
+                          Defs<[DSPOutFlag16_19]>;
+
+// Move from/to hi/lo.
+class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>;
+class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>;
+class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>;
+class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>;
 
 // Dot product with accumulate/subtract
 class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
@@ -734,13 +751,17 @@ class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
 
 class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
 
-class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>,
+                         Defs<[DSPOutFlag16_19]>;
 
 class MULT_DSP_DESC  : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
 class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
@@ -752,15 +773,16 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
 // Comparison
 class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
                                                int_mips_cmpu_eq_qb, NoItinerary,
-                                               DSPRegs>, IsCommutable;
+                                               DSPRegs>,
+                        IsCommutable, Defs<[DSPCCond]>;
 
 class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
                                                int_mips_cmpu_lt_qb, NoItinerary,
-                                               DSPRegs>, IsCommutable;
+                                               DSPRegs>, Defs<[DSPCCond]>;
 
 class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
                                                int_mips_cmpu_le_qb, NoItinerary,
-                                               DSPRegs>, IsCommutable;
+                                               DSPRegs>, Defs<[DSPCCond]>;
 
 class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
                                                 int_mips_cmpgu_eq_qb,
@@ -769,222 +791,235 @@ class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
 
 class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
                                                 int_mips_cmpgu_lt_qb,
-                                                NoItinerary, CPURegs, DSPRegs>,
-                         IsCommutable;
+                                                NoItinerary, CPURegs, DSPRegs>;
 
 class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
                                                 int_mips_cmpgu_le_qb,
-                                                NoItinerary, CPURegs, DSPRegs>,
-                         IsCommutable;
+                                                NoItinerary, CPURegs, DSPRegs>;
 
 class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
                                               NoItinerary, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPCCond]>;
 
 class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
                                               NoItinerary, DSPRegs>,
-                       IsCommutable;
+                       Defs<[DSPCCond]>;
 
 class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
                                               NoItinerary, DSPRegs>,
-                       IsCommutable;
+                       Defs<[DSPCCond]>;
 
 // Misc
 class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
-                                           NoItinerary, CPURegs>, ClearDefs;
+                                           NoItinerary, CPURegs>;
 
 class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
-                                              NoItinerary, DSPRegs, DSPRegs>,
-                       ClearDefs;
+                                              NoItinerary, DSPRegs, DSPRegs>;
 
 class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
-                                    NoItinerary, DSPRegs>, ClearDefs;
+                                    NoItinerary, DSPRegs>;
 
 class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
-                                    NoItinerary, DSPRegs>, ClearDefs;
+                                    NoItinerary, DSPRegs>;
 
 class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
-                                             NoItinerary, DSPRegs, CPURegs>,
-                      ClearDefs;
+                                             NoItinerary, DSPRegs, CPURegs>;
 
 class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
-                                             NoItinerary, DSPRegs, CPURegs>,
-                      ClearDefs;
+                                             NoItinerary, DSPRegs, CPURegs>;
 
 class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
                                             NoItinerary, DSPRegs, DSPRegs>,
-                     ClearDefs, UseDSPCtrl;
+                     Uses<[DSPCCond]>;
 
 class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
                                             NoItinerary, DSPRegs, DSPRegs>,
-                     ClearDefs, UseDSPCtrl;
+                     Uses<[DSPCCond]>;
 
-class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs;
+class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>;
 
-class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs;
+class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>;
 
-class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs;
+class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>;
 
 class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>;
 
 // Extr
-class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>;
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+                  Uses<[DSPPos]>, Defs<[DSPEFI]>;
 
-class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>;
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+                   Uses<[DSPPos]>, Defs<[DSPEFI]>;
 
-class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>;
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+                    Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
 
 class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
-                                             NoItinerary>;
+                                             NoItinerary>,
+                     Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
 
-class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>;
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+                    Defs<[DSPOutFlag23]>;
 
 class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
-                                             NoItinerary>;
+                                             NoItinerary>, Defs<[DSPOutFlag23]>;
 
 class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
-                                              NoItinerary>;
+                                              NoItinerary>,
+                      Defs<[DSPOutFlag23]>;
 
 class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
-                                               NoItinerary>;
+                                               NoItinerary>,
+                       Defs<[DSPOutFlag23]>;
 
 class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
-                                               NoItinerary>;
+                                               NoItinerary>,
+                       Defs<[DSPOutFlag23]>;
 
 class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
-                                                NoItinerary>;
+                                                NoItinerary>,
+                        Defs<[DSPOutFlag23]>;
 
 class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
-                                              NoItinerary>;
+                                              NoItinerary>,
+                      Defs<[DSPOutFlag23]>;
 
 class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
-                                               NoItinerary>;
+                                               NoItinerary>,
+                       Defs<[DSPOutFlag23]>;
 
 class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
 
 class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
 
-class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>, Defs<[DSPPos]>;
 
 class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
 
 class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>;
 
-class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>;
+class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>,
+                  Uses<[DSPPos, DSPSCount]>;
 
 //===----------------------------------------------------------------------===//
 // MIPS DSP Rev 2
 // Addition/subtraction
 class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>, IsCommutable;
+                                       DSPRegs, DSPRegs>, IsCommutable,
+                     Defs<[DSPOutFlag20]>;
 
 class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag20]>;
 
 class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>;
+                                       DSPRegs, DSPRegs>,
+                     Defs<[DSPOutFlag20]>;
 
 class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
-                                         NoItinerary, DSPRegs, DSPRegs>;
+                                         NoItinerary, DSPRegs, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
-                                         NoItinerary, DSPRegs>,
-                      ClearDefs, IsCommutable;
+                                         NoItinerary, DSPRegs>, IsCommutable;
 
 class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
-                                           NoItinerary, DSPRegs>,
-                        ClearDefs, IsCommutable;
+                                           NoItinerary, DSPRegs>, IsCommutable;
 
 class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
-                                         NoItinerary, DSPRegs>, ClearDefs;
+                                         NoItinerary, DSPRegs>;
 
 class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
-                                         NoItinerary, DSPRegs>,
-                      ClearDefs, IsCommutable;
+                                         NoItinerary, DSPRegs>, IsCommutable;
 
 class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
-                                           NoItinerary, DSPRegs>,
-                        ClearDefs, IsCommutable;
+                                           NoItinerary, DSPRegs>, IsCommutable;
 
 class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
-                                         NoItinerary, DSPRegs>, ClearDefs;
+                                         NoItinerary, DSPRegs>;
 
 class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
-                                        NoItinerary, CPURegs>,
-                     ClearDefs, IsCommutable;
+                                        NoItinerary, CPURegs>, IsCommutable;
 
 class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
-                                          NoItinerary, CPURegs>,
-                       ClearDefs, IsCommutable;
+                                          NoItinerary, CPURegs>, IsCommutable;
 
 class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
-                                        NoItinerary, CPURegs>, ClearDefs;
+                                        NoItinerary, CPURegs>;
 
 class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
-                                          NoItinerary, CPURegs>, ClearDefs;
+                                          NoItinerary, CPURegs>;
 
 // Comparison
 class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
                                                  int_mips_cmpgdu_eq_qb,
                                                  NoItinerary, CPURegs, DSPRegs>,
-                          IsCommutable;
+                          IsCommutable, Defs<[DSPCCond]>;
 
 class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
                                                  int_mips_cmpgdu_lt_qb,
                                                  NoItinerary, CPURegs, DSPRegs>,
-                          IsCommutable;
+                          Defs<[DSPCCond]>;
 
 class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
                                                  int_mips_cmpgdu_le_qb,
                                                  NoItinerary, CPURegs, DSPRegs>,
-                          IsCommutable;
+                          Defs<[DSPCCond]>;
 
 // Absolute
 class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
-                                              NoItinerary, DSPRegs>;
+                                              NoItinerary, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 // Multiplication
-class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary,
-                                       DSPRegs>, IsCommutable;
+class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary,
+                                       DSPRegs>, IsCommutable,
+                    Defs<[DSPOutFlag21]>;
 
 class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
-                                         NoItinerary, DSPRegs>, IsCommutable;
+                                         NoItinerary, DSPRegs>, IsCommutable,
+                      Defs<[DSPOutFlag21]>;
 
 class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
-                                         NoItinerary, CPURegs>, IsCommutable;
+                                         NoItinerary, CPURegs>, IsCommutable,
+                      Defs<[DSPOutFlag21]>;
 
 class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
-                                          NoItinerary, CPURegs>, IsCommutable;
+                                          NoItinerary, CPURegs>, IsCommutable,
+                       Defs<[DSPOutFlag21]>;
 
 class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag21]>;
 
 // Dot product with accumulate/subtract
 class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
 
 class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
 
-class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>,
+                          Defs<[DSPOutFlag16_19]>;
 
 class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
-                                              MipsDPAQX_SA_W_PH>;
+                                              MipsDPAQX_SA_W_PH>,
+                           Defs<[DSPOutFlag16_19]>;
 
 class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
 
 class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
 
-class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>,
+                          Defs<[DSPOutFlag16_19]>;
 
 class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
-                                              MipsDPSQX_SA_W_PH>;
+                                              MipsDPSQX_SA_W_PH>,
+                           Defs<[DSPOutFlag16_19]>;
 
 class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
 
@@ -996,45 +1031,45 @@ class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
 class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
                                                      int_mips_precr_sra_ph_w,
                                                      NoItinerary, DSPRegs,
-                                                     CPURegs>, ClearDefs;
+                                                     CPURegs>;
 
 class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
                                                       int_mips_precr_sra_r_ph_w,
                                                        NoItinerary, DSPRegs,
-                                                       CPURegs>, ClearDefs;
+                                                       CPURegs>;
 
 // Shift
-class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
+                                          NoItinerary, DSPRegs>;
 
 class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
-                                            immZExt3, NoItinerary, DSPRegs>,
-                       ClearDefs;
+                                            immZExt3, NoItinerary, DSPRegs>;
 
 class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
-                                             NoItinerary, DSPRegs>, ClearDefs;
+                                             NoItinerary, DSPRegs>;
 
-class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
+                                          NoItinerary, DSPRegs>;
 
 class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 // Misc
 class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
-                                     NoItinerary>, ClearDefs;
+                                     NoItinerary>;
 
 class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
-                                     NoItinerary>, ClearDefs;
+                                     NoItinerary>;
 
 class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
-                                      NoItinerary>, ClearDefs;
+                                      NoItinerary>;
 
 // Pseudos.
-def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>;
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
+                                                NoItinerary>, Uses<[DSPPos]>;
 
 // Instruction defs.
 // MIPS DSP Rev 1
@@ -1094,6 +1129,10 @@ def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
 def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
 def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
 def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP : MFHI_ENC, MFHI_DESC;
+def MFLO_DSP : MFLO_ENC, MFLO_DESC;
+def MTHI_DSP : MTHI_ENC, MTHI_DESC;
+def MTLO_DSP : MTLO_ENC, MTLO_DESC;
 def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
 def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
 def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
@@ -1201,13 +1240,35 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
 }
 
 // Pseudos.
-/// Pseudo instructions for loading, storing and copying accumulator registers.
 let isPseudo = 1 in {
+  // Pseudo instructions for loading and storing accumulator registers.
   defm LOAD_AC_DSP  : LoadM<"load_ac_dsp", ACRegsDSP>;
   defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
+
+  // Pseudos for loading and storing ccond field of DSP control register.
+  defm LOAD_CCOND_DSP  : LoadM<"load_ccond_dsp", DSPCC>;
+  defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>;
 }
 
-def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+// Pseudo CMP and PICK instructions.
+class PseudoCMP<Instruction RealInst> :
+  PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>,
+  PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects;
+
+class PseudoPICK<Instruction RealInst> :
+  PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>,
+  PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>,
+  NeverHasSideEffects;
+
+def PseudoCMP_EQ_PH : PseudoCMP<CMP_EQ_PH>;
+def PseudoCMP_LT_PH : PseudoCMP<CMP_LT_PH>;
+def PseudoCMP_LE_PH : PseudoCMP<CMP_LE_PH>;
+def PseudoCMPU_EQ_QB : PseudoCMP<CMPU_EQ_QB>;
+def PseudoCMPU_LT_QB : PseudoCMP<CMPU_LT_QB>;
+def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
+
+def PseudoPICK_PH : PseudoPICK<PICK_PH>;
+def PseudoPICK_QB : PseudoPICK<PICK_QB>;
 
 // Patterns.
 class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
@@ -1232,6 +1293,95 @@ def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
 def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
              (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
 
+// Binary operations.
+class DSPBinPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+                Predicate Pred = HasDSP> :
+  DSPPat<(Node ValTy:$a, ValTy:$b), (Inst ValTy:$a, ValTy:$b), Pred>;
+
+def : DSPBinPat<ADDQ_PH, v2i16, int_mips_addq_ph>;
+def : DSPBinPat<ADDQ_PH, v2i16, add>;
+def : DSPBinPat<SUBQ_PH, v2i16, int_mips_subq_ph>;
+def : DSPBinPat<SUBQ_PH, v2i16, sub>;
+def : DSPBinPat<MUL_PH, v2i16, int_mips_mul_ph, HasDSPR2>;
+def : DSPBinPat<MUL_PH, v2i16, mul, HasDSPR2>;
+def : DSPBinPat<ADDU_QB, v4i8, int_mips_addu_qb>;
+def : DSPBinPat<ADDU_QB, v4i8, add>;
+def : DSPBinPat<SUBU_QB, v4i8, int_mips_subu_qb>;
+def : DSPBinPat<SUBU_QB, v4i8, sub>;
+def : DSPBinPat<ADDSC, i32, int_mips_addsc>;
+def : DSPBinPat<ADDSC, i32, addc>;
+def : DSPBinPat<ADDWC, i32, int_mips_addwc>;
+def : DSPBinPat<ADDWC, i32, adde>;
+
+// Shift immediate patterns.
+class DSPShiftPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+                  SDPatternOperator Imm, Predicate Pred = HasDSP> :
+  DSPPat<(Node ValTy:$a, Imm:$shamt), (Inst ValTy:$a, Imm:$shamt), Pred>;
+
+def : DSPShiftPat<SHLL_PH, v2i16, MipsSHLL_DSP, imm>;
+def : DSPShiftPat<SHRA_PH, v2i16, MipsSHRA_DSP, imm>;
+def : DSPShiftPat<SHRL_PH, v2i16, MipsSHRL_DSP, imm, HasDSPR2>;
+def : DSPShiftPat<SHLL_PH, v2i16, int_mips_shll_ph, immZExt4>;
+def : DSPShiftPat<SHRA_PH, v2i16, int_mips_shra_ph, immZExt4>;
+def : DSPShiftPat<SHRL_PH, v2i16, int_mips_shrl_ph, immZExt4, HasDSPR2>;
+def : DSPShiftPat<SHLL_QB, v4i8, MipsSHLL_DSP, imm>;
+def : DSPShiftPat<SHRA_QB, v4i8, MipsSHRA_DSP, imm, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, MipsSHRL_DSP, imm>;
+def : DSPShiftPat<SHLL_QB, v4i8, int_mips_shll_qb, immZExt3>;
+def : DSPShiftPat<SHRA_QB, v4i8, int_mips_shra_qb, immZExt3, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, int_mips_shrl_qb, immZExt3>;
+
+// SETCC/SELECT_CC patterns.
+class DSPSetCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                  CondCode CC> :
+  DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+                      (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)),
+                      (ValTy ZERO)))>;
+
+class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                     CondCode CC> :
+  DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+                      (ValTy ZERO),
+                      (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>;
+
+class DSPSelectCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                     CondCode CC> :
+  DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $c, $d))>;
+
+class DSPSelectCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                        CondCode CC> :
+  DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $d, $c))>;
+
+def : DSPSetCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSetCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSetCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSetCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSetCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSetCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSetCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSetCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSetCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSetCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSetCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSetCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
+def : DSPSelectCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSelectCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSelectCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSelectCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSelectCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSelectCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSelectCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSelectCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSelectCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
 // Extr patterns.
 class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
   DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 77b08cb..968e536 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -17,7 +17,6 @@
 #include "MipsSEISelDAGToDAG.h"
 #include "Mips.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsAnalyzeImmediate.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index e2219f2..4d76181 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -30,7 +30,6 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -198,6 +197,11 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::MADDU_DSP:         return "MipsISD::MADDU_DSP";
   case MipsISD::MSUB_DSP:          return "MipsISD::MSUB_DSP";
   case MipsISD::MSUBU_DSP:         return "MipsISD::MSUBU_DSP";
+  case MipsISD::SHLL_DSP:          return "MipsISD::SHLL_DSP";
+  case MipsISD::SHRA_DSP:          return "MipsISD::SHRA_DSP";
+  case MipsISD::SHRL_DSP:          return "MipsISD::SHRL_DSP";
+  case MipsISD::SETCC_DSP:         return "MipsISD::SETCC_DSP";
+  case MipsISD::SELECT_CC_DSP:     return "MipsISD::SELECT_CC_DSP";
   default:                         return NULL;
   }
 }
@@ -211,7 +215,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
   // Mips does not have i1 type, so use i32 for
   // setcc operations results (slt, sgt, ...).
   setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   // Load extented operations for i1 types must be promoted
   setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
@@ -346,9 +350,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
   setOperationAction(ISD::VAEND,             MVT::Other, Expand);
 
-  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
-  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
-
   // Use the default for now
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
@@ -449,7 +450,7 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+static Mips::CondCode condCodeToFCC(ISD::CondCode CC) {
   switch (CC) {
   default: llvm_unreachable("Unknown fp condition code!");
   case ISD::SETEQ:
@@ -508,7 +509,7 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
   return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
-                     DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+                     DAG.getConstant(condCodeToFCC(CC), MVT::i32));
 }
 
 // Creates and returns a CMovFPT/F node.
@@ -712,10 +713,7 @@ void
 MipsTargetLowering::ReplaceNodeResults(SDNode *N,
                                        SmallVectorImpl<SDValue> &Results,
                                        SelectionDAG &DAG) const {
-  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
-
-  for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
-    Results.push_back(Res.getValue(I));
+  return LowerOperationWrapper(N, Results, DAG);
 }
 
 SDValue MipsTargetLowering::
@@ -739,15 +737,12 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::FRAMEADDR:          return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:         return lowerRETURNADDR(Op, DAG);
   case ISD::EH_RETURN:          return lowerEH_RETURN(Op, DAG);
-  case ISD::MEMBARRIER:         return lowerMEMBARRIER(Op, DAG);
   case ISD::ATOMIC_FENCE:       return lowerATOMIC_FENCE(Op, DAG);
   case ISD::SHL_PARTS:          return lowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:          return lowerShiftRightParts(Op, DAG, true);
   case ISD::SRL_PARTS:          return lowerShiftRightParts(Op, DAG, false);
   case ISD::LOAD:               return lowerLOAD(Op, DAG);
   case ISD::STORE:              return lowerSTORE(Op, DAG);
-  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
-  case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
   case ISD::ADD:                return lowerADD(Op, DAG);
   }
   return SDValue();
@@ -1827,15 +1822,6 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
                      Chain.getValue(1));
 }
 
-// TODO: set SType according to the desired memory barrier behavior.
-SDValue
-MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
-  unsigned SType = 0;
-  DebugLoc DL = Op.getDebugLoc();
-  return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(SType, MVT::i32));
-}
-
 SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
                                               SelectionDAG &DAG) const {
   // FIXME: Need pseudo-fence for 'singlethread' fences
@@ -1918,7 +1904,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
+static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
                             SDValue Chain, SDValue Src, unsigned Offset) {
   SDValue Ptr = LD->getBasePtr();
   EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT();
@@ -1958,15 +1944,15 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   //  (set tmp, (ldl (add baseptr, 7), undef))
   //  (set dst, (ldr baseptr, tmp))
   if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) {
-    SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
+    SDValue LDL = createLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
                                IsLittle ? 7 : 0);
-    return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
+    return createLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
                         IsLittle ? 0 : 7);
   }
 
-  SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
+  SDValue LWL = createLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
                              IsLittle ? 3 : 0);
-  SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
+  SDValue LWR = createLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
                              IsLittle ? 0 : 3);
 
   // Expand
@@ -1997,7 +1983,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
+static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
                              SDValue Chain, unsigned Offset) {
   SDValue Ptr = SD->getBasePtr(), Value = SD->getValue();
   EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType();
@@ -2034,9 +2020,9 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   //  (swl val, (add baseptr, 3))
   //  (swr val, baseptr)
   if ((VT == MVT::i32) || SD->isTruncatingStore()) {
-    SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain,
+    SDValue SWL = createStoreLR(MipsISD::SWL, DAG, SD, Chain,
                                 IsLittle ? 3 : 0);
-    return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
+    return createStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
   }
 
   assert(VT == MVT::i64);
@@ -2046,172 +2032,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   // to
   //  (sdl val, (add baseptr, 7))
   //  (sdr val, baseptr)
-  SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
-  return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
-}
-
-static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
-  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
-                             DAG.getConstant(0, MVT::i32));
-  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
-                             DAG.getConstant(1, MVT::i32));
-  return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
-}
-
-static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
-  SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
-                           DAG.getConstant(Mips::sub_lo, MVT::i32));
-  SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
-                           DAG.getConstant(Mips::sub_hi, MVT::i32));
-  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
-}
-
-// This function expands mips intrinsic nodes which have 64-bit input operands
-// or output values.
-//
-// out64 = intrinsic-node in64
-// =>
-// lo = copy (extract-element (in64, 0))
-// hi = copy (extract-element (in64, 1))
-// mips-specific-node
-// v0 = copy lo
-// v1 = copy hi
-// out64 = merge-values (v0, v1)
-//
-static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
-  DebugLoc DL = Op.getDebugLoc();
-  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
-  SmallVector<SDValue, 3> Ops;
-  unsigned OpNo = 0;
-
-  // See if Op has a chain input.
-  if (HasChainIn)
-    Ops.push_back(Op->getOperand(OpNo++));
-
-  // The next operand is the intrinsic opcode.
-  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
-
-  // See if the next operand has type i64.
-  SDValue Opnd = Op->getOperand(++OpNo), In64;
-
-  if (Opnd.getValueType() == MVT::i64)
-    In64 = initAccumulator(Opnd, DL, DAG);
-  else
-    Ops.push_back(Opnd);
-
-  // Push the remaining operands.
-  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
-    Ops.push_back(Op->getOperand(OpNo));
-
-  // Add In64 to the end of the list.
-  if (In64.getNode())
-    Ops.push_back(In64);
-
-  // Scan output.
-  SmallVector<EVT, 2> ResTys;
-
-  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
-       I != E; ++I)
-    ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
-
-  // Create node.
-  SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
-  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
-
-  if (!HasChainIn)
-    return Out;
-
-  assert(Val->getValueType(1) == MVT::Other);
-  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
-  return DAG.getMergeValues(Vals, 2, DL);
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                    SelectionDAG &DAG) const {
-  switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
-  default:
-    return SDValue();
-  case Intrinsic::mips_shilo:
-    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
-  case Intrinsic::mips_dpau_h_qbl:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
-  case Intrinsic::mips_dpau_h_qbr:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
-  case Intrinsic::mips_dpsu_h_qbl:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
-  case Intrinsic::mips_dpsu_h_qbr:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
-  case Intrinsic::mips_dpa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
-  case Intrinsic::mips_dps_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
-  case Intrinsic::mips_dpax_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
-  case Intrinsic::mips_dpsx_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
-  case Intrinsic::mips_mulsa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
-  case Intrinsic::mips_mult:
-    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
-  case Intrinsic::mips_multu:
-    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
-  case Intrinsic::mips_madd:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
-  case Intrinsic::mips_maddu:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
-  case Intrinsic::mips_msub:
-    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
-  case Intrinsic::mips_msubu:
-    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
-  }
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
-  default:
-    return SDValue();
-  case Intrinsic::mips_extp:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
-  case Intrinsic::mips_extpdp:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
-  case Intrinsic::mips_extr_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
-  case Intrinsic::mips_extr_r_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
-  case Intrinsic::mips_extr_rs_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
-  case Intrinsic::mips_extr_s_h:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
-  case Intrinsic::mips_mthlip:
-    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
-  case Intrinsic::mips_mulsaq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
-  case Intrinsic::mips_maq_s_w_phl:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
-  case Intrinsic::mips_maq_s_w_phr:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
-  case Intrinsic::mips_maq_sa_w_phl:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
-  case Intrinsic::mips_maq_sa_w_phr:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
-  case Intrinsic::mips_dpaq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
-  case Intrinsic::mips_dpsq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
-  case Intrinsic::mips_dpaq_sa_l_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
-  case Intrinsic::mips_dpsq_sa_l_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
-  case Intrinsic::mips_dpaqx_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
-  case Intrinsic::mips_dpaqx_sa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
-  case Intrinsic::mips_dpsqx_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
-  case Intrinsic::mips_dpsqx_sa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
-  }
+  SDValue SDL = createStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
+  return createStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
 }
 
 SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
@@ -3009,8 +2831,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
       return std::make_pair((unsigned)Mips::T9_64, &Mips::CPU64RegsRegClass);
     case 'l': // register suitable for indirect jump
       if (VT == MVT::i32)
-        return std::make_pair((unsigned)Mips::LO, &Mips::HILORegClass);
-      return std::make_pair((unsigned)Mips::LO64, &Mips::HILO64RegClass);
+        return std::make_pair((unsigned)Mips::LO, &Mips::LORegsRegClass);
+      return std::make_pair((unsigned)Mips::LO64, &Mips::LORegs64RegClass);
     case 'x': // register suitable for indirect jump
       // Fixme: Not triggering the use of both hi and low
       // This will generate an error message
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index cab71a6..5587e8f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -143,6 +143,15 @@ namespace llvm {
       MSUB_DSP,
       MSUBU_DSP,
 
+      // DSP shift nodes.
+      SHLL_DSP,
+      SHRA_DSP,
+      SHRL_DSP,
+
+      // DSP setcc and select_cc nodes.
+      SETCC_DSP,
+      SELECT_CC_DSP,
+
       // Load/Store Left/Right nodes.
       LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LWR,
@@ -338,15 +347,12 @@ namespace llvm {
     SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
     SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
     SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
     SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
                                  bool IsSRA) const;
     SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
 
     /// isEligibleForTailCallOptimization - Check whether the call is eligible
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
index ee432c8..ea07372 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -36,6 +36,24 @@ def FrmFR     : Format<4>;
 def FrmFI     : Format<5>;
 def FrmOther  : Format<6>; // Instruction w/ a custom format
 
+class MMRel;
+
+def Std2MicroMips : InstrMapping {
+  let FilterClass = "MMRel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["Arch"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = ["se"];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["se"], ["micromips"]];
+}
+
+class StdArch {
+  string Arch = "se";
+}
+
 // Generic Mips Format
 class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
                InstrItinClass itin, Format f>: Instruction
@@ -74,9 +92,11 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // Mips32/64 Instruction Format
 class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
-             InstrItinClass itin, Format f>:
+             InstrItinClass itin, Format f, string opstr = ""> :
   MipsInst<outs, ins, asmstr, pattern, itin, f> {
   let Predicates = [HasStdEnc];
+  string BaseOpcode = opstr;
+  string Arch;
 }
 
 // Mips Pseudo Instructions Format
@@ -192,7 +212,7 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt>
   let Inst{2-0}   = sel;
 }
 
-class ADD_FM<bits<6> op, bits<6> funct> {
+class ADD_FM<bits<6> op, bits<6> funct> : StdArch {
   bits<5> rd;
   bits<5> rs;
   bits<5> rt;
@@ -207,7 +227,7 @@ class ADD_FM<bits<6> op, bits<6> funct> {
   let Inst{5-0}   = funct;
 }
 
-class ADDI_FM<bits<6> op> {
+class ADDI_FM<bits<6> op> : StdArch {
   bits<5>  rs;
   bits<5>  rt;
   bits<16> imm16;
@@ -220,7 +240,7 @@ class ADDI_FM<bits<6> op> {
   let Inst{15-0}  = imm16;
 }
 
-class SRA_FM<bits<6> funct, bit rotate> {
+class SRA_FM<bits<6> funct, bit rotate> : StdArch {
   bits<5> rd;
   bits<5> rt;
   bits<5> shamt;
@@ -236,7 +256,7 @@ class SRA_FM<bits<6> funct, bit rotate> {
   let Inst{5-0}   = funct;
 }
 
-class SRLV_FM<bits<6> funct, bit rotate> {
+class SRLV_FM<bits<6> funct, bit rotate> : StdArch {
   bits<5> rd;
   bits<5> rt;
   bits<5> rs;
@@ -288,7 +308,7 @@ class B_FM {
   let Inst{15-0}  = offset;
 }
 
-class SLTI_FM<bits<6> op> {
+class SLTI_FM<bits<6> op> : StdArch {
   bits<5> rt;
   bits<5> rs;
   bits<16> imm16;
@@ -413,7 +433,7 @@ class SYNC_FM {
   let Inst{5-0}   = 0xf;
 }
 
-class MULT_FM<bits<6> op, bits<6> funct> {
+class MULT_FM<bits<6> op, bits<6> funct> : StdArch {
   bits<5>  rs;
   bits<5>  rt;
 
@@ -529,7 +549,7 @@ class MFC1_FM<bits<5> funct> {
   let Inst{10-0}  = 0;
 }
 
-class LW_FM<bits<6> op> {
+class LW_FM<bits<6> op> : StdArch {
   bits<5> rt;
   bits<21> addr;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 3a82e81..86ec729 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -179,6 +179,7 @@ def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">,
                       AssemblerPredicate<"FeatureMips32">;
 def HasStdEnc :       Predicate<"Subtarget.hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
+def NotDSP :          Predicate<"!Subtarget.hasDSP()">;
 
 class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
   let Predicates = [HasStdEnc];
@@ -374,11 +375,9 @@ class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
                   SDPatternOperator OpNode = null_frag>:
   InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+         [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR, opstr> {
   let isCommutable = isComm;
   let isReMaterializable = 1;
-  string BaseOpcode;
-  string Arch;
 }
 
 // Arithmetic and logical instructions with 2 register operands.
@@ -387,7 +386,8 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
                   SDPatternOperator OpNode = null_frag> :
   InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
          !strconcat(opstr, "\t$rt, $rs, $imm16"),
-         [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
+         [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))],
+         IIAlu, FrmI, opstr> {
   let isReMaterializable = 1;
 }
 
@@ -404,7 +404,7 @@ class MArithR<string opstr, bit isComm = 0> :
 class LogicNOR<string opstr, RegisterOperand RC>:
   InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
+         [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR, opstr> {
   let isCommutable = 1;
 }
 
@@ -414,13 +414,13 @@ class shift_rotate_imm<string opstr, Operand ImmOpnd,
                        SDPatternOperator PF = null_frag> :
   InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
          !strconcat(opstr, "\t$rd, $rt, $shamt"),
-         [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
+         [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR, opstr>;
 
 class shift_rotate_reg<string opstr, RegisterOperand RC,
                        SDPatternOperator OpNode = null_frag>:
   InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rt, $rs"),
-         [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
+         [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR, opstr>;
 
 // Load Upper Imediate
 class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
@@ -440,18 +440,20 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // Memory Load/Store
 class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
-           Operand MemOpnd, ComplexPattern Addr> :
+           Operand MemOpnd, ComplexPattern Addr, string ofsuffix> :
   InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
+         [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI,
+         !strconcat(opstr, ofsuffix)> {
   let DecoderMethod = "DecodeMem";
   let canFoldAsLoad = 1;
   let mayLoad = 1;
 }
 
 class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
-            Operand MemOpnd, ComplexPattern Addr> :
+            Operand MemOpnd, ComplexPattern Addr, string ofsuffix> :
   InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
+         [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI,
+         !strconcat(opstr, ofsuffix)> {
   let DecoderMethod = "DecodeMem";
   let mayStore = 1;
 }
@@ -459,8 +461,9 @@ class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
 multiclass LoadM<string opstr, RegisterClass RC,
                  SDPatternOperator OpNode = null_frag,
                  ComplexPattern Addr = addr> {
-  def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
-  def _P8  : Load<opstr, OpNode, RC, mem64, Addr>,
+  def NAME : Load<opstr, OpNode, RC, mem, Addr, "">,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Load<opstr, OpNode, RC, mem64, Addr, "_p8">,
              Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
@@ -470,8 +473,9 @@ multiclass LoadM<string opstr, RegisterClass RC,
 multiclass StoreM<string opstr, RegisterClass RC,
                   SDPatternOperator OpNode = null_frag,
                   ComplexPattern Addr = addr> {
-  def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
-  def _P8  : Store<opstr, OpNode, RC, mem64, Addr>,
+  def NAME : Store<opstr, OpNode, RC, mem, Addr, "">,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Store<opstr, OpNode, RC, mem64, Addr, "_p8">,
              Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
@@ -542,14 +546,15 @@ class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
 class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
   InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
+         [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))],
+         IIAlu, FrmR, opstr>;
 
 class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
               RegisterClass RC>:
   InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
          !strconcat(opstr, "\t$rt, $rs, $imm16"),
          [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
-         IIAlu, FrmI>;
+         IIAlu, FrmI, opstr>;
 
 // Jump
 class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
@@ -636,7 +641,7 @@ class SYNC_FT :
 class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
            list<Register> DefRegs> :
   InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
-         itin, FrmR> {
+         itin, FrmR, opstr> {
   let isCommutable = 1;
   let Defs = DefRegs;
   let neverHasSideEffects = 1;
@@ -832,14 +837,12 @@ let usesCustomInserter = 1 in {
   defm ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap32<atomic_cmp_swap_32>;
 }
 
-/// Pseudo instructions for loading, storing and copying accumulator registers.
+/// Pseudo instructions for loading and storing accumulator registers.
 let isPseudo = 1 in {
   defm LOAD_AC64  : LoadM<"load_ac64", ACRegs>;
   defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
 }
 
-def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
-
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
@@ -848,60 +851,70 @@ def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
 //===----------------------------------------------------------------------===//
 
 /// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+def ADDiu : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
             ADDI_FM<0x9>, IsAsCheapAsAMove;
-def ADDi  : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
-def SLTi  : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
-def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
-def ANDi  : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+def ADDi  : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+            SLTI_FM<0xa>;
+def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+            SLTI_FM<0xb>;
+def ANDi  : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
             ADDI_FM<0xc>;
-def ORi   : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+def ORi   : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
             ADDI_FM<0xd>;
-def XORi  : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+def XORi  : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
             ADDI_FM<0xe>;
-def LUi   : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
+def LUi   : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
 
 /// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
-def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
-def MUL  : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
-def ADD  : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
-def SUB  : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
-def SLT  : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
-def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
-def AND  : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
-def OR   : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
-def XOR  : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
-def NOR  : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
+def ADDu  : MMRel, ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>,
+            ADD_FM<0, 0x21>;
+def SUBu  : MMRel, ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>,
+            ADD_FM<0, 0x23>;
+def MUL   : MMRel, ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>,
+            ADD_FM<0x1c, 2>;
+def ADD   : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB   : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT   : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu  : MMRel, SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND   : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+            ADD_FM<0, 0x24>;
+def OR    : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+            ADD_FM<0, 0x25>;
+def XOR   : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+            ADD_FM<0, 0x26>;
+def NOR   : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
 
 /// Shift Instructions
-def SLL  : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
+def SLL  : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
            SRA_FM<0, 0>;
-def SRL  : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
+def SRL  : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
            SRA_FM<2, 0>;
-def SRA  : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
+def SRA  : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
            SRA_FM<3, 0>;
-def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
-def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
-def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
+def SLLV : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : MMRel, shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
 
 // Rotate Instructions
 let Predicates = [HasMips32r2, HasStdEnc] in {
-  def ROTR  : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
+  def ROTR  : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr,
+                                      immZExt5>,
               SRA_FM<2, 1>;
-  def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
+  def ROTRV : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>,
+              SRLV_FM<6, 1>;
 }
 
 /// Load and Store Instructions
 ///  aligned
-defm LB  : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
-defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
-defm LH  : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
-defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
-defm LW  : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
-defm SB  : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
-defm SH  : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
-defm SW  : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
+defm LB  : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM<0x20>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, MMRel, LW_FM<0x24>;
+defm LH  : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, MMRel, LW_FM<0x21>;
+defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM<0x25>;
+defm LW  : LoadM<"lw", CPURegs, load, addrDefault>, MMRel, LW_FM<0x23>;
+defm SB  : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM<0x28>;
+defm SH  : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM<0x29>;
+defm SW  : StoreM<"sw", CPURegs, store>, MMRel, LW_FM<0x2b>;
 
 /// load/store left/right
 defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>;
@@ -968,8 +981,10 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
 }
 
 /// Multiply and Divide Instructions.
-def MULT  : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
-def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def MULT  : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+            MULT_FM<0, 0x18>;
+def MULTu : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+            MULT_FM<0, 0x19>;
 def PseudoMULT  : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
 def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
 def SDIV  : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
@@ -1066,10 +1081,10 @@ def : InstAlias<"negu $rt, $rs",
 def : InstAlias<"slt $rs, $rt, $imm",
                 (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
 def : InstAlias<"xor $rs, $rt, $imm",
-                (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+                (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
       Requires<[NotMips64]>;
 def : InstAlias<"or $rs, $rt, $imm",
-                (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+                (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
                  Requires<[NotMips64]>;
 def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
 def : InstAlias<"mfc0 $rt, $rd",
@@ -1128,10 +1143,12 @@ def : MipsPat<(i32 imm:$imm),
 // Carry MipsPatterns
 def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs),
               (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
-              (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc  CPURegs:$src, immSExt16:$imm),
-              (ADDiu CPURegs:$src, imm:$imm)>;
+let Predicates = [HasStdEnc, NotDSP] in {
+  def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
+                (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+  def : MipsPat<(addc  CPURegs:$src, immSExt16:$imm),
+                (ADDiu CPURegs:$src, imm:$imm)>;
+}
 
 // Call
 def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
@@ -1326,3 +1343,6 @@ include "Mips16InstrInfo.td"
 include "MipsDSPInstrFormats.td"
 include "MipsDSPInstrInfo.td"
 
+// Micromips
+include "MicroMipsInstrFormats.td"
+include "MicroMipsInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
index 2efe534..bf5ad37 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -399,6 +399,8 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
 }
 
 bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+  if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+    return false;
   if ((TM.getRelocationModel() == Reloc::PIC_) &&
       TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
       F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
new file mode 100644
index 0000000..c6abf17
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+// Instruction Selector Subtarget Control
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsISelDAGToDAG.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
+  const_cast<MipsSubtarget&>(Subtarget).resetSubtarget(&MF);
+  return false;
+}
+
+char MipsModuleDAGToDAGISel::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) {
+  return new MipsModuleDAGToDAGISel(TM);
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h
new file mode 100644
index 0000000..fda35ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h
@@ -0,0 +1,66 @@
+//===---- MipsModuleISelDAGToDAG.h -  Change Subtarget             --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMODULEISELDAGTODAG_H
+#define MIPSMODULEISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsModuleDAGToDAGISel : public MachineFunctionPass {
+public:
+
+  static char ID;
+
+  explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
+    : MachineFunctionPass(ID),
+      TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MIPS DAG->DAG Pattern Instruction Selection";
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual SDNode *Select(SDNode *N) {
+    llvm_unreachable("unexpected");
+  }
+
+protected:
+  /// Keep a pointer to the MipsSubtarget around so that we can make the right
+  /// decision when generating code for different targets.
+  const TargetMachine &TM;
+  const MipsSubtarget &Subtarget;
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
new file mode 100644
index 0000000..1919077
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
@@ -0,0 +1,113 @@
+//===---- MipsOs16.cpp for Mips Option -Os16                       --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-os16"
+#include "MipsOs16.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace {
+
+  // Figure out if we need float point based on the function signature.
+  // We need to move variables in and/or out of floating point
+  // registers because of the ABI
+  //
+  bool needsFPFromSig(Function &F) {
+    Type* RetType = F.getReturnType();
+    switch (RetType->getTypeID()) {
+    case Type::FloatTyID:
+    case Type::DoubleTyID:
+      return true;
+    default:
+      ;
+    }
+    if (F.arg_size() >=1) {
+      Argument &Arg = F.getArgumentList().front();
+      switch (Arg.getType()->getTypeID()) {
+        case Type::FloatTyID:
+        case Type::DoubleTyID:
+          return true;
+        default:
+          ;
+      }
+    }
+    return false;
+  }
+
+  // Figure out if the function will need floating point operations
+  //
+  bool needsFP(Function &F) {
+    if (needsFPFromSig(F))
+      return true;
+    for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+        const Instruction &Inst = *I;
+        switch (Inst.getOpcode()) {
+        case Instruction::FAdd:
+        case Instruction::FSub:
+        case Instruction::FMul:
+        case Instruction::FDiv:
+        case Instruction::FRem:
+        case Instruction::FPToUI:
+        case Instruction::FPToSI:
+        case Instruction::UIToFP:
+        case Instruction::SIToFP:
+        case Instruction::FPTrunc:
+        case Instruction::FPExt:
+        case Instruction::FCmp:
+          return true;
+        default:
+          ;
+        }
+        if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+          DEBUG(dbgs() << "Working on call" << "\n");
+          Function &F_ =  *CI->getCalledFunction();
+          if (needsFPFromSig(F_))
+            return true;
+        }
+      }
+    return false;
+  }
+}
+namespace llvm {
+
+
+bool MipsOs16::runOnModule(Module &M) {
+  DEBUG(errs() << "Run on Module MipsOs16\n");
+  bool modified = false;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    DEBUG(dbgs() << "Working on " << F->getName() << "\n");
+    if (needsFP(*F)) {
+      DEBUG(dbgs() << " need to compile as nomips16 \n");
+      F->addFnAttr("nomips16");
+    }
+    else {
+      F->addFnAttr("mips16");
+      DEBUG(dbgs() << " no need to compile as nomips16 \n");
+    }
+  }
+  return modified;
+}
+
+char MipsOs16::ID = 0;
+
+}
+
+ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) {
+  return new MipsOs16;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.h b/contrib/llvm/lib/Target/Mips/MipsOs16.h
new file mode 100644
index 0000000..21beef8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsOs16.h
@@ -0,0 +1,49 @@
+//===---- MipsOs16.h for Mips Option -Os16                         --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+
+
+#ifndef MIPSOS16_H
+#define MIPSOS16_H
+
+using namespace llvm;
+
+namespace llvm {
+
+class MipsOs16 : public ModulePass {
+
+public:
+  static char ID;
+
+  MipsOs16() : ModulePass(ID) {
+
+  }
+
+  virtual const char *getPassName() const {
+    return "MIPS Os16 Optimization";
+  }
+
+  virtual bool runOnModule(Module &M);
+
+};
+
+ModulePass *createMipsOs16(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 3250733..dead07b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -145,7 +145,11 @@ getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(Mips::HWR29_64);
 
   // Reserve DSP control register.
-  Reserved.set(Mips::DSPCtrl);
+  Reserved.set(Mips::DSPPos);
+  Reserved.set(Mips::DSPSCount);
+  Reserved.set(Mips::DSPCarry);
+  Reserved.set(Mips::DSPEFI);
+  Reserved.set(Mips::DSPOutFlag);
 
   // Reserve RA if in mips16 mode.
   if (Subtarget.inMips16Mode()) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index 64458bc..229f167 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -16,6 +16,11 @@ def sub_fpodd  : SubRegIndex;
 def sub_32     : SubRegIndex;
 def sub_lo     : SubRegIndex;
 def sub_hi     : SubRegIndex;
+def sub_dsp16_19 : SubRegIndex;
+def sub_dsp20    : SubRegIndex;
+def sub_dsp21    : SubRegIndex;
+def sub_dsp22    : SubRegIndex;
+def sub_dsp23    : SubRegIndex;
 }
 
 class Unallocatable {
@@ -229,14 +234,14 @@ let Namespace = "Mips" in {
   def D31_64  : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>;
 
   // Hi/Lo registers
-  def HI  : Register<"hi">, DwarfRegNum<[64]>;
-  def HI1 : Register<"hi1">, DwarfRegNum<[176]>;
-  def HI2 : Register<"hi2">, DwarfRegNum<[178]>;
-  def HI3 : Register<"hi3">, DwarfRegNum<[180]>;
-  def LO  : Register<"lo">, DwarfRegNum<[65]>;
-  def LO1 : Register<"lo1">, DwarfRegNum<[177]>;
-  def LO2 : Register<"lo2">, DwarfRegNum<[179]>;
-  def LO3 : Register<"lo3">, DwarfRegNum<[181]>;
+  def HI  : Register<"ac0">, DwarfRegNum<[64]>;
+  def HI1 : Register<"ac1">, DwarfRegNum<[176]>;
+  def HI2 : Register<"ac2">, DwarfRegNum<[178]>;
+  def HI3 : Register<"ac3">, DwarfRegNum<[180]>;
+  def LO  : Register<"ac0">, DwarfRegNum<[65]>;
+  def LO1 : Register<"ac1">, DwarfRegNum<[177]>;
+  def LO2 : Register<"ac2">, DwarfRegNum<[179]>;
+  def LO3 : Register<"ac3">, DwarfRegNum<[181]>;
 
   let SubRegIndices = [sub_32] in {
   def HI64  : RegisterWithSubRegs<"hi", [HI]>;
@@ -264,7 +269,23 @@ let Namespace = "Mips" in {
 
   def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
 
-  def DSPCtrl : Register<"dspctrl">;
+  // DSP-ASE control register fields.
+  def DSPPos : Register<"">;
+  def DSPSCount : Register<"">;
+  def DSPCarry : Register<"">;
+  def DSPEFI : Register<"">;
+  def DSPOutFlag16_19 : Register<"">;
+  def DSPOutFlag20 : Register<"">;
+  def DSPOutFlag21 : Register<"">;
+  def DSPOutFlag22 : Register<"">;
+  def DSPOutFlag23 : Register<"">;
+  def DSPCCond : Register<"">;
+
+  let SubRegIndices = [sub_dsp16_19, sub_dsp20, sub_dsp21, sub_dsp22,
+                       sub_dsp23] in
+  def DSPOutFlag : RegisterWithSubRegs<"", [DSPOutFlag16_19, DSPOutFlag20,
+                                            DSPOutFlag21, DSPOutFlag22,
+                                            DSPOutFlag23]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -340,8 +361,12 @@ def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
 def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
 
 // Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
+def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>;
+def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>;
+def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>;
+def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>;
+def LORegs64 : RegisterClass<"Mips", [i64], 64, (add LO64)>;
+def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>;
 
 // Hardware registers
 def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
@@ -360,6 +385,9 @@ def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
   let Size = 64;
 }
 
+def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
+
+// Register Operands.
 def CPURegsAsmOperand : AsmOperandClass {
   let Name = "CPURegsAsm";
   let ParserMethod = "parseCPURegs";
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 68ec921..b295e91 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -32,17 +32,21 @@ using namespace llvm;
 namespace {
 typedef MachineBasicBlock::iterator Iter;
 
-/// Helper class to expand accumulator pseudos.
-class ExpandACCPseudo {
+/// Helper class to expand pseudos.
+class ExpandPseudo {
 public:
-  ExpandACCPseudo(MachineFunction &MF);
+  ExpandPseudo(MachineFunction &MF);
   bool expand();
 
 private:
   bool expandInstr(MachineBasicBlock &MBB, Iter I);
-  void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
-  void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
-  void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandLoadCCond(MachineBasicBlock &MBB, Iter I);
+  void expandStoreCCond(MachineBasicBlock &MBB, Iter I);
+  void expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  bool expandCopy(MachineBasicBlock &MBB, Iter I);
+  bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
+                     unsigned Src, unsigned RegSize);
 
   MachineFunction &MF;
   const MipsSEInstrInfo &TII;
@@ -51,12 +55,12 @@ private:
 };
 }
 
-ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
   : MF(MF_),
     TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
     RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
 
-bool ExpandACCPseudo::expand() {
+bool ExpandPseudo::expand() {
   bool Expanded = false;
 
   for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
@@ -67,34 +71,39 @@ bool ExpandACCPseudo::expand() {
   return Expanded;
 }
 
-bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
   switch(I->getOpcode()) {
+  case Mips::LOAD_CCOND_DSP:
+  case Mips::LOAD_CCOND_DSP_P8:
+    expandLoadCCond(MBB, I);
+    break;
+  case Mips::STORE_CCOND_DSP:
+  case Mips::STORE_CCOND_DSP_P8:
+    expandStoreCCond(MBB, I);
+    break;
   case Mips::LOAD_AC64:
   case Mips::LOAD_AC64_P8:
   case Mips::LOAD_AC_DSP:
   case Mips::LOAD_AC_DSP_P8:
-    expandLoad(MBB, I, 4);
+    expandLoadACC(MBB, I, 4);
     break;
   case Mips::LOAD_AC128:
   case Mips::LOAD_AC128_P8:
-    expandLoad(MBB, I, 8);
+    expandLoadACC(MBB, I, 8);
     break;
   case Mips::STORE_AC64:
   case Mips::STORE_AC64_P8:
   case Mips::STORE_AC_DSP:
   case Mips::STORE_AC_DSP_P8:
-    expandStore(MBB, I, 4);
+    expandStoreACC(MBB, I, 4);
     break;
   case Mips::STORE_AC128:
   case Mips::STORE_AC128_P8:
-    expandStore(MBB, I, 8);
+    expandStoreACC(MBB, I, 8);
     break;
-  case Mips::COPY_AC64:
-  case Mips::COPY_AC_DSP:
-    expandCopy(MBB, I, 4);
-    break;
-  case Mips::COPY_AC128:
-    expandCopy(MBB, I, 8);
+  case TargetOpcode::COPY:
+    if (!expandCopy(MBB, I))
+      return false;
     break;
   default:
     return false;
@@ -104,7 +113,37 @@ bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
   return true;
 }
 
-void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) {
+  //  load $vr, FI
+  //  copy ccond, $vr
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(4);
+  unsigned VR = MRI.createVirtualRegister(RC);
+  unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+
+  TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0);
+  BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst)
+    .addReg(VR, RegState::Kill);
+}
+
+void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) {
+  //  copy $vr, ccond
+  //  store $vr, FI
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(4);
+  unsigned VR = MRI.createVirtualRegister(RC);
+  unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+
+  BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR)
+    .addReg(Src, getKillRegState(I->getOperand(0).isKill()));
+  TII.storeRegToStack(MBB, I, VR, true, FI, RC, &RegInfo, 0);
+}
+
+void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
                                  unsigned RegSize) {
   //  load $vr0, FI
   //  copy lo, $vr0
@@ -128,7 +167,7 @@ void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
   BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
 }
 
-void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
                                   unsigned RegSize) {
   //  copy $vr0, lo
   //  store $vr0, FI
@@ -152,8 +191,20 @@ void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
   TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
 }
 
-void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
-                                 unsigned RegSize) {
+bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) {
+  unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+
+  if (Mips::ACRegsDSPRegClass.contains(Dst, Src))
+    return expandCopyACC(MBB, I, Dst, Src, 4);
+
+  if (Mips::ACRegs128RegClass.contains(Dst, Src))
+    return expandCopyACC(MBB, I, Dst, Src, 8);
+
+  return false;
+}
+
+bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
+                                 unsigned Src, unsigned RegSize) {
   //  copy $vr0, src_lo
   //  copy dst_lo, $vr0
   //  copy $vr1, src_hi
@@ -162,7 +213,6 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
   const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
   unsigned VR0 = MRI.createVirtualRegister(RC);
   unsigned VR1 = MRI.createVirtualRegister(RC);
-  unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
   unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
   unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
   unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
@@ -176,6 +226,7 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
   BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
   BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
     .addReg(VR1, RegState::Kill);
+  return true;
 }
 
 unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
@@ -438,7 +489,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Expand pseudo instructions which load, store or copy accumulators.
   // Add an emergency spill slot if a pseudo was expanded.
-  if (ExpandACCPseudo(MF).expand()) {
+  if (ExpandPseudo(MF).expand()) {
     // The spill slot should be half the size of the accumulator. If target is
     // mips64, it should be 64-bit, otherwise it should be 32-bt.
     const TargetRegisterClass *RC = STI.hasMips64() ?
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index d6d2207..8a6523a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -35,6 +35,36 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
+bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  if (Subtarget.inMips16Mode())
+    return false;
+  return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
+
+void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
+                                               MachineFunction &MF) {
+  MachineInstrBuilder MIB(MF, &MI);
+  unsigned Mask = MI.getOperand(1).getImm();
+  unsigned Flag = IsDef ? RegState::ImplicitDefine : RegState::Implicit;
+
+  if (Mask & 1)
+    MIB.addReg(Mips::DSPPos, Flag);
+
+  if (Mask & 2)
+    MIB.addReg(Mips::DSPSCount, Flag);
+
+  if (Mask & 4)
+    MIB.addReg(Mips::DSPCarry, Flag);
+
+  if (Mask & 8)
+    MIB.addReg(Mips::DSPOutFlag, Flag);
+
+  if (Mask & 16)
+    MIB.addReg(Mips::DSPCCond, Flag);
+
+  if (Mask & 32)
+    MIB.addReg(Mips::DSPEFI, Flag);
+}
 
 bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
                                                 const MachineInstr& MI) {
@@ -173,29 +203,14 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
 
   for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
        ++MFI)
-    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
-      replaceUsesWithZeroReg(MRI, *I);
-}
-
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
-                               bool HasLo, bool HasHi) {
-  SDNode *Lo = 0, *Hi = 0;
-  SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
-                                       N->getOperand(1));
-  SDValue InFlag = SDValue(Mul, 0);
-
-  if (HasLo) {
-    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
-    Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
-    InFlag = SDValue(Lo, 1);
-  }
-  if (HasHi) {
-    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
-    Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
-  }
-  return std::make_pair(Lo, Hi);
+    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
+      if (I->getOpcode() == Mips::RDDSP)
+        addDSPCtrlRegOperands(false, *I, MF);
+      else if (I->getOpcode() == Mips::WRDSP)
+        addDSPCtrlRegOperands(true, *I, MF);
+      else
+        replaceUsesWithZeroReg(MRI, *I);
+    }
 }
 
 SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
@@ -211,7 +226,7 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
   SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
   EVT VT = LHS.getValueType();
 
-  SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+  SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops);
   SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
                                             SDValue(Carry, 0), RHS);
   return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
@@ -307,9 +322,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   // Instruction Selection not handled by the auto-generated
   // tablegen selection should be handled here.
   ///
-  EVT NodeTy = Node->getValueType(0);
   SDNode *Result;
-  unsigned MultOpc;
 
   switch(Opcode) {
   default: break;
@@ -321,51 +334,13 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   }
 
   case ISD::ADDE: {
+    if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+      break;
     SDValue InFlag = Node->getOperand(2);
     Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
     return std::make_pair(true, Result);
   }
 
-  /// Mul with two results
-  case ISD::SMUL_LOHI:
-  case ISD::UMUL_LOHI: {
-    if (NodeTy == MVT::i32)
-      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
-    else
-      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
-    std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
-                                                  true, true);
-
-    if (!SDValue(Node, 0).use_empty())
-      ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
-    if (!SDValue(Node, 1).use_empty())
-      ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
-    return std::make_pair(true, (SDNode*)NULL);
-  }
-
-  /// Special Muls
-  case ISD::MUL: {
-    // Mips32 has a 32-bit three operand mul instruction.
-    if (Subtarget.hasMips32() && NodeTy == MVT::i32)
-      break;
-    MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
-    Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
-    return std::make_pair(true, Result);
-  }
-  case ISD::MULHS:
-  case ISD::MULHU: {
-    if (NodeTy == MVT::i32)
-      MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
-    else
-      MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
-    Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
-    return std::make_pair(true, Result);
-  }
-
   case ISD::ConstantFP: {
     ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
     if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
@@ -460,7 +435,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
     const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
                             Node->getOperand(1), HiIdx };
     SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
-                                         MVT::Untyped, Ops, 5);
+                                         MVT::Untyped, Ops);
     return std::make_pair(true, Res);
   }
   }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 6137ab0..a235e96 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -24,6 +24,12 @@ public:
   explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
 
 private:
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
+                             MachineFunction &MF);
+
   bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
 
   std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 4f21921..8544bb8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
@@ -27,6 +28,9 @@ EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
 MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
   : MipsTargetLowering(TM) {
   // Set up the register classes
+
+  clearRegisterClasses();
+
   addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
 
   if (HasMips64)
@@ -42,12 +46,23 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
       for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
         setOperationAction(Opc, VecTys[i], Expand);
 
+      setOperationAction(ISD::ADD, VecTys[i], Legal);
+      setOperationAction(ISD::SUB, VecTys[i], Legal);
       setOperationAction(ISD::LOAD, VecTys[i], Legal);
       setOperationAction(ISD::STORE, VecTys[i], Legal);
       setOperationAction(ISD::BITCAST, VecTys[i], Legal);
     }
+
+    setTargetDAGCombine(ISD::SHL);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::SRL);
+    setTargetDAGCombine(ISD::SETCC);
+    setTargetDAGCombine(ISD::VSELECT);
   }
 
+  if (Subtarget->hasDSPR2())
+    setOperationAction(ISD::MUL, MVT::v2i16, Legal);
+
   if (!TM.Options.UseSoftFloat) {
     addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
 
@@ -65,14 +80,19 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::MULHS,              MVT::i32, Custom);
   setOperationAction(ISD::MULHU,              MVT::i32, Custom);
 
-  if (HasMips64)
+  if (HasMips64) {
+    setOperationAction(ISD::MULHS,            MVT::i64, Custom);
+    setOperationAction(ISD::MULHU,            MVT::i64, Custom);
     setOperationAction(ISD::MUL,              MVT::i64, Custom);
+  }
+
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN,  MVT::i64, Custom);
 
   setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
   setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
   setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
-  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
   setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
   setOperationAction(ISD::LOAD,               MVT::i32, Custom);
   setOperationAction(ISD::STORE,              MVT::i32, Custom);
@@ -113,7 +133,10 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
   case ISD::MULHU:     return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
   case ISD::MUL:       return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
   case ISD::SDIVREM:   return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
-  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
+                                          DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
   }
 
   return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -297,18 +320,136 @@ static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
+                                      SelectionDAG &DAG,
+                                      const MipsSubtarget *Subtarget) {
+  // See if this is a vector splat immediate node.
+  APInt SplatValue, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
+  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+
+  if (!BV ||
+      !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+                           EltSize, !Subtarget->isLittle()) ||
+      (SplatBitSize != EltSize) ||
+      (SplatValue.getZExtValue() >= EltSize))
+    return SDValue();
+
+  return DAG.getNode(Opc, N->getDebugLoc(), Ty, N->getOperand(0),
+                     DAG.getConstant(SplatValue.getZExtValue(), MVT::i32));
+}
+
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
+}
+
+static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
+}
+
+
+static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
+}
+
+static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
+  bool IsV216 = (Ty == MVT::v2i16);
+
+  switch (CC) {
+  case ISD::SETEQ:
+  case ISD::SETNE:  return true;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE:  return IsV216;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return !IsV216;
+  default:          return false;
+  }
+}
+
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
+    return SDValue();
+
+  return DAG.getNode(MipsISD::SETCC_DSP, N->getDebugLoc(), Ty, N->getOperand(0),
+                     N->getOperand(1), N->getOperand(2));
+}
+
+static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  SDValue SetCC = N->getOperand(0);
+
+  if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
+    return SDValue();
+
+  return DAG.getNode(MipsISD::SELECT_CC_DSP, N->getDebugLoc(), Ty,
+                     SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1),
+                     N->getOperand(2), SetCC.getOperand(2));
+}
+
 SDValue
 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
+  SDValue Val;
 
   switch (N->getOpcode()) {
   case ISD::ADDE:
     return performADDECombine(N, DAG, DCI, Subtarget);
   case ISD::SUBE:
     return performSUBECombine(N, DAG, DCI, Subtarget);
-  default:
-    return MipsTargetLowering::PerformDAGCombine(N, DCI);
+  case ISD::SHL:
+    return performSHLCombine(N, DAG, DCI, Subtarget);
+  case ISD::SRA:
+    return performSRACombine(N, DAG, DCI, Subtarget);
+  case ISD::SRL:
+    return performSRLCombine(N, DAG, DCI, Subtarget);
+  case ISD::VSELECT:
+    return performVSELECTCombine(N, DAG);
+  case ISD::SETCC: {
+    Val = performSETCCCombine(N, DAG);
+    break;
   }
+  }
+
+  if (Val.getNode())
+    return Val;
+
+  return MipsTargetLowering::PerformDAGCombine(N, DCI);
 }
 
 MachineBasicBlock *
@@ -378,6 +519,171 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
   return DAG.getMergeValues(Vals, 2, DL);
 }
 
+
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(0, MVT::i32));
+  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(1, MVT::i32));
+  return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_lo, MVT::i32));
+  SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_hi, MVT::i32));
+  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
+// This function expands mips intrinsic nodes which have 64-bit input operands
+// or output values.
+//
+// out64 = intrinsic-node in64
+// =>
+// lo = copy (extract-element (in64, 0))
+// hi = copy (extract-element (in64, 1))
+// mips-specific-node
+// v0 = copy lo
+// v1 = copy hi
+// out64 = merge-values (v0, v1)
+//
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+  DebugLoc DL = Op.getDebugLoc();
+  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
+  SmallVector<SDValue, 3> Ops;
+  unsigned OpNo = 0;
+
+  // See if Op has a chain input.
+  if (HasChainIn)
+    Ops.push_back(Op->getOperand(OpNo++));
+
+  // The next operand is the intrinsic opcode.
+  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
+
+  // See if the next operand has type i64.
+  SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+  if (Opnd.getValueType() == MVT::i64)
+    In64 = initAccumulator(Opnd, DL, DAG);
+  else
+    Ops.push_back(Opnd);
+
+  // Push the remaining operands.
+  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+    Ops.push_back(Op->getOperand(OpNo));
+
+  // Add In64 to the end of the list.
+  if (In64.getNode())
+    Ops.push_back(In64);
+
+  // Scan output.
+  SmallVector<EVT, 2> ResTys;
+
+  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+       I != E; ++I)
+    ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+  // Create node.
+  SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
+
+  if (!HasChainIn)
+    return Out;
+
+  assert(Val->getValueType(1) == MVT::Other);
+  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
+  return DAG.getMergeValues(Vals, 2, DL);
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+  default:
+    return SDValue();
+  case Intrinsic::mips_shilo:
+    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
+  case Intrinsic::mips_dpau_h_qbl:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
+  case Intrinsic::mips_dpau_h_qbr:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
+  case Intrinsic::mips_dpsu_h_qbl:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
+  case Intrinsic::mips_dpsu_h_qbr:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
+  case Intrinsic::mips_dpa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
+  case Intrinsic::mips_dps_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
+  case Intrinsic::mips_dpax_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
+  case Intrinsic::mips_dpsx_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
+  case Intrinsic::mips_mulsa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
+  case Intrinsic::mips_mult:
+    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
+  case Intrinsic::mips_multu:
+    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
+  case Intrinsic::mips_madd:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
+  case Intrinsic::mips_maddu:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
+  case Intrinsic::mips_msub:
+    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
+  case Intrinsic::mips_msubu:
+    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+  }
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+  default:
+    return SDValue();
+  case Intrinsic::mips_extp:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
+  case Intrinsic::mips_extpdp:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
+  case Intrinsic::mips_extr_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
+  case Intrinsic::mips_extr_r_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
+  case Intrinsic::mips_extr_rs_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
+  case Intrinsic::mips_extr_s_h:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
+  case Intrinsic::mips_mthlip:
+    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
+  case Intrinsic::mips_mulsaq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
+  case Intrinsic::mips_maq_s_w_phl:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
+  case Intrinsic::mips_maq_s_w_phr:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
+  case Intrinsic::mips_maq_sa_w_phl:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
+  case Intrinsic::mips_maq_sa_w_phr:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
+  case Intrinsic::mips_dpaq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
+  case Intrinsic::mips_dpsq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
+  case Intrinsic::mips_dpaq_sa_l_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
+  case Intrinsic::mips_dpsq_sa_l_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
+  case Intrinsic::mips_dpaqx_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
+  case Intrinsic::mips_dpaqx_sa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
+  case Intrinsic::mips_dpsqx_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
+  case Intrinsic::mips_dpsqx_sa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
+  }
+}
+
 MachineBasicBlock * MipsSETargetLowering::
 emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
   // $bb:
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
index 186f6a3..ec8a5c7 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -31,6 +31,11 @@ namespace llvm {
     virtual MachineBasicBlock *
     EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
 
+    virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+                                    EVT VT) const {
+      return false;
+    }
+
     virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
       if (VT == MVT::Untyped)
         return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
@@ -54,6 +59,9 @@ namespace llvm {
     SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
                         SelectionDAG &DAG) const;
 
+    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
     MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
                                     MachineBasicBlock *BB) const;
   };
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index ca0315e..a0768e5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -95,20 +95,39 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       Opc = Mips::CFC1;
     else if (Mips::FGR32RegClass.contains(SrcReg))
       Opc = Mips::MFC1;
-    else if (SrcReg == Mips::HI)
+    else if (Mips::HIRegsRegClass.contains(SrcReg))
       Opc = Mips::MFHI, SrcReg = 0;
-    else if (SrcReg == Mips::LO)
+    else if (Mips::LORegsRegClass.contains(SrcReg))
       Opc = Mips::MFLO, SrcReg = 0;
+    else if (Mips::HIRegsDSPRegClass.contains(SrcReg))
+      Opc = Mips::MFHI_DSP;
+    else if (Mips::LORegsDSPRegClass.contains(SrcReg))
+      Opc = Mips::MFLO_DSP;
+    else if (Mips::DSPCCRegClass.contains(SrcReg)) {
+      BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4)
+        .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
+      return;
+    }
   }
   else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
     if (Mips::CCRRegClass.contains(DestReg))
       Opc = Mips::CTC1;
     else if (Mips::FGR32RegClass.contains(DestReg))
       Opc = Mips::MTC1;
-    else if (DestReg == Mips::HI)
+    else if (Mips::HIRegsRegClass.contains(DestReg))
       Opc = Mips::MTHI, DestReg = 0;
-    else if (DestReg == Mips::LO)
+    else if (Mips::LORegsRegClass.contains(DestReg))
       Opc = Mips::MTLO, DestReg = 0;
+    else if (Mips::HIRegsDSPRegClass.contains(DestReg))
+      Opc = Mips::MTHI_DSP;
+    else if (Mips::LORegsDSPRegClass.contains(DestReg))
+      Opc = Mips::MTLO_DSP;
+    else if (Mips::DSPCCRegClass.contains(DestReg)) {
+      BuildMI(MBB, I, DL, get(Mips::WRDSP))
+        .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1 << 4)
+        .addReg(DestReg, RegState::ImplicitDefine);
+      return;
+    }
   }
   else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
     Opc = Mips::FMOV_S;
@@ -121,27 +140,21 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
     if (Mips::CPU64RegsRegClass.contains(SrcReg))
       Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
-    else if (SrcReg == Mips::HI64)
+    else if (Mips::HIRegs64RegClass.contains(SrcReg))
       Opc = Mips::MFHI64, SrcReg = 0;
-    else if (SrcReg == Mips::LO64)
+    else if (Mips::LORegs64RegClass.contains(SrcReg))
       Opc = Mips::MFLO64, SrcReg = 0;
     else if (Mips::FGR64RegClass.contains(SrcReg))
       Opc = Mips::DMFC1;
   }
   else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
-    if (DestReg == Mips::HI64)
+    if (Mips::HIRegs64RegClass.contains(DestReg))
       Opc = Mips::MTHI64, DestReg = 0;
-    else if (DestReg == Mips::LO64)
+    else if (Mips::LORegs64RegClass.contains(DestReg))
       Opc = Mips::MTLO64, DestReg = 0;
     else if (Mips::FGR64RegClass.contains(DestReg))
       Opc = Mips::DMTC1;
   }
-  else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
-    Opc = Mips::COPY_AC64;
-  else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
-    Opc = Mips::COPY_AC_DSP;
-  else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
-    Opc = Mips::COPY_AC128;
 
   assert(Opc && "Cannot copy registers");
 
@@ -178,6 +191,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
   else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
+  else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -209,6 +224,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
   else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
+  else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
index e11e5d1..14a2b27 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -11,29 +11,56 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "mips-subtarget"
+
+#include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
 #include "Mips.h"
 #include "MipsRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
 #include "MipsGenSubtargetInfo.inc"
 
+
 using namespace llvm;
 
+// FIXME: Maybe this should be on by default when Mips16 is specified
+//
+static cl::opt<bool> Mixed16_32(
+  "mips-mixed-16-32",
+  cl::init(false),
+  cl::desc("Allow for a mixture of Mips16 "
+           "and Mips32 code in a single source file"),
+  cl::Hidden);
+
+static cl::opt<bool> Mips_Os16(
+  "mips-os16",
+  cl::init(false),
+  cl::desc("Compile all functions that don' use "
+           "floating point as Mips 16"),
+  cl::Hidden);
+
 void MipsSubtarget::anchor() { }
 
 MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                              const std::string &FS, bool little,
-                             Reloc::Model _RM) :
+                             Reloc::Model _RM, MipsTargetMachine *_TM) :
   MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
   IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
   HasBitCount(false), HasFPIdx(false),
   InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
-  RM(_RM)
+  AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+  RM(_RM), OverrideMode(NoOverride), TM(_TM)
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
@@ -42,6 +69,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
 
+  PreviousInMips16Mode = InMips16Mode;
+
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
 
@@ -72,3 +101,48 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                             &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
   return OptLevel >= CodeGenOpt::Aggressive;
 }
+
+//FIXME: This logic for reseting the subtarget along with
+// the helper classes can probably be simplified but there are a lot of
+// cases so we will defer rewriting this to later.
+//
+void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
+  bool ChangeToMips16 = false, ChangeToNoMips16 = false;
+  DEBUG(dbgs() << "resetSubtargetFeatures" << "\n");
+  AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+  ChangeToMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                        "mips16");
+  ChangeToNoMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                        "nomips16");
+  assert (!(ChangeToMips16 & ChangeToNoMips16) &&
+          "mips16 and nomips16 specified on the same function");
+  if (ChangeToMips16) {
+    if (PreviousInMips16Mode)
+      return;
+    OverrideMode = Mips16Override;
+    PreviousInMips16Mode = true;
+    TM->setHelperClassesMips16();
+    return;
+  } else if (ChangeToNoMips16) {
+    if (!PreviousInMips16Mode)
+      return;
+    OverrideMode = NoMips16Override;
+    PreviousInMips16Mode = false;
+    TM->setHelperClassesMipsSE();
+    return;
+  } else {
+    if (OverrideMode == NoOverride)
+      return;
+    OverrideMode = NoOverride;
+    DEBUG(dbgs() << "back to default" << "\n");
+    if (inMips16Mode() && !PreviousInMips16Mode) {
+      TM->setHelperClassesMips16();
+      PreviousInMips16Mode = true;
+    } else if (!inMips16Mode() && PreviousInMips16Mode) {
+      TM->setHelperClassesMipsSE();
+      PreviousInMips16Mode = false;
+    }
+    return;
+  }
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 7a2e47c..f2f0e15 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -16,7 +16,9 @@
 
 #include "MCTargetDesc/MipsReginfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -25,6 +27,8 @@
 namespace llvm {
 class StringRef;
 
+class MipsTargetMachine;
+
 class MipsSubtarget : public MipsGenSubtargetInfo {
   virtual void anchor();
 
@@ -89,12 +93,23 @@ protected:
   // InMips16 -- can process Mips16 instructions
   bool InMips16Mode;
 
+  // PreviousInMips16 -- the function we just processed was in Mips 16 Mode
+  bool PreviousInMips16Mode;
+
   // InMicroMips -- can process MicroMips instructions
   bool InMicroMipsMode;
 
   // HasDSP, HasDSPR2 -- supports DSP ASE.
   bool HasDSP, HasDSPR2;
 
+  // Allow mixed Mips16 and Mips32 in one source file
+  bool AllowMixed16_32;
+
+  // Optimize for space by compiling all functions as Mips 16 unless
+  // it needs floating point. Functions needing floating point are
+  // compiled as Mips32
+  bool Os16;
+
   InstrItineraryData InstrItins;
 
   // The instance to the register info section object
@@ -103,6 +118,12 @@ protected:
   // Relocation Model
   Reloc::Model RM;
 
+  // We can override the determination of whether we are in mips16 mode
+  // as from the command line
+  enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode;
+
+  MipsTargetMachine *TM;
+
 public:
   virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                                      AntiDepBreakMode& Mode,
@@ -118,7 +139,8 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   MipsSubtarget(const std::string &TT, const std::string &CPU,
-                const std::string &FS, bool little, Reloc::Model RM);
+                const std::string &FS, bool little, Reloc::Model RM,
+                MipsTargetMachine *TM);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -137,7 +159,20 @@ public:
   bool isSingleFloat() const { return IsSingleFloat; }
   bool isNotSingleFloat() const { return !IsSingleFloat; }
   bool hasVFPU() const { return HasVFPU; }
-  bool inMips16Mode() const { return InMips16Mode; }
+  bool inMips16Mode() const {
+    switch (OverrideMode) {
+    case NoOverride:
+      return InMips16Mode;
+    case Mips16Override:
+      return true;
+    case NoMips16Override:
+      return false;
+    }
+    llvm_unreachable("Unexpected mode");
+  }
+  bool inMips16ModeDefault() {
+    return InMips16Mode;
+  }
   bool inMicroMipsMode() const { return InMicroMipsMode; }
   bool hasDSP() const { return HasDSP; }
   bool hasDSPR2() const { return HasDSPR2; }
@@ -153,11 +188,20 @@ public:
   bool hasBitCount()  const { return HasBitCount; }
   bool hasFPIdx()     const { return HasFPIdx; }
 
+  bool allowMixed16_32() const { return AllowMixed16_32;};
+
+  bool os16() const { return Os16;};
+
   // Grab MipsRegInfo object
   const MipsReginfo &getMReginfo() const { return MRI; }
 
   // Grab relocation model
   Reloc::Model getRelocationModel() const {return RM;}
+
+  /// \brief Reset the subtarget for the Mips target.
+  void resetSubtarget(MachineFunction *MF);
+
+
 };
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 3336358..ee28e2a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -15,11 +15,26 @@
 #include "Mips.h"
 #include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "MipsOs16.h"
+#include "MipsSEFrameLowering.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSEISelLowering.h"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips16FrameLowering.h"
+#include "Mips16InstrInfo.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips16ISelLowering.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
+
+
 extern "C" void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
@@ -42,7 +57,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
                   CodeGenOpt::Level OL,
                   bool isLittle)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS, isLittle, RM),
+    Subtarget(TT, CPU, FS, isLittle, RM, this),
     DL(isLittle ?
                (Subtarget.isABI_N64() ?
                 "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
@@ -54,9 +69,46 @@ MipsTargetMachine(const Target &T, StringRef TT,
                 "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
     InstrInfo(MipsInstrInfo::create(*this)),
     FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
-    TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
+    TLInfo(MipsTargetLowering::create(*this)),
+    TSInfo(*this), JITInfo() {
+}
+
+
+void MipsTargetMachine::setHelperClassesMips16() {
+  InstrInfoSE.swap(InstrInfo);
+  FrameLoweringSE.swap(FrameLowering);
+  TLInfoSE.swap(TLInfo);
+  if (!InstrInfo16) {
+    InstrInfo.reset(MipsInstrInfo::create(*this));
+    FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+    TLInfo.reset(MipsTargetLowering::create(*this));
+  } else {
+    InstrInfo16.swap(InstrInfo);
+    FrameLowering16.swap(FrameLowering);
+    TLInfo16.swap(TLInfo);
+  }
+  assert(TLInfo && "null target lowering 16");
+  assert(InstrInfo && "null instr info 16");
+  assert(FrameLowering && "null frame lowering 16");
 }
 
+void MipsTargetMachine::setHelperClassesMipsSE() {
+  InstrInfo16.swap(InstrInfo);
+  FrameLowering16.swap(FrameLowering);
+  TLInfo16.swap(TLInfo);
+  if (!InstrInfoSE) {
+    InstrInfo.reset(MipsInstrInfo::create(*this));
+    FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+    TLInfo.reset(MipsTargetLowering::create(*this));
+  } else {
+    InstrInfoSE.swap(InstrInfo);
+    FrameLoweringSE.swap(FrameLowering);
+    TLInfoSE.swap(TLInfo);
+  }
+  assert(TLInfo && "null target lowering in SE");
+  assert(InstrInfo && "null instr info SE");
+  assert(FrameLowering && "null frame lowering SE");
+}
 void MipsebTargetMachine::anchor() { }
 
 MipsebTargetMachine::
@@ -90,6 +142,7 @@ public:
     return *getMipsTargetMachine().getSubtargetImpl();
   }
 
+  virtual void addIRPasses();
   virtual bool addInstSelector();
   virtual bool addPreEmitPass();
 };
@@ -99,24 +152,50 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new MipsPassConfig(this, PM);
 }
 
+void MipsPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  if (getMipsSubtarget().os16())
+    addPass(createMipsOs16(getMipsTargetMachine()));
+}
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
 bool MipsPassConfig::addInstSelector() {
-  addPass(createMipsISelDag(getMipsTargetMachine()));
+  if (getMipsSubtarget().allowMixed16_32()) {
+    addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+    addPass(createMips16ISelDag(getMipsTargetMachine()));
+    addPass(createMipsSEISelDag(getMipsTargetMachine()));
+  } else {
+    addPass(createMipsISelDag(getMipsTargetMachine()));
+  }
   return false;
 }
 
+void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  if (Subtarget.allowMixed16_32()) {
+    DEBUG(errs() << "No ");
+    //FIXME: The Basic Target Transform Info
+    // pass needs to become a function pass instead of
+    // being an immutable pass and then this method as it exists now
+    // would be unnecessary.
+    PM.add(createNoTargetTransformInfoPass());
+  } else
+    LLVMTargetMachine::addAnalysisPasses(PM);
+  DEBUG(errs() << "Target Transform Info Pass Added\n");
+}
+
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
 bool MipsPassConfig::addPreEmitPass() {
   MipsTargetMachine &TM = getMipsTargetMachine();
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
   addPass(createMipsDelaySlotFillerPass(TM));
 
-  // NOTE: long branch has not been implemented for mips16.
-  if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
+  if (Subtarget.hasStandardEncoding() ||
+      Subtarget.allowMixed16_32())
     addPass(createMipsLongBranchPass(TM));
-  if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+  if (Subtarget.inMips16Mode() ||
+      Subtarget.allowMixed16_32())
     addPass(createMipsConstantIslandPass(TM));
 
   return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
index 7e5f192..ee55708 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -21,6 +21,8 @@
 #include "MipsSelectionDAGInfo.h"
 #include "MipsSubtarget.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -35,6 +37,12 @@ class MipsTargetMachine : public LLVMTargetMachine {
   OwningPtr<const MipsInstrInfo> InstrInfo;
   OwningPtr<const MipsFrameLowering> FrameLowering;
   OwningPtr<const MipsTargetLowering> TLInfo;
+  OwningPtr<const MipsInstrInfo> InstrInfo16;
+  OwningPtr<const MipsFrameLowering> FrameLowering16;
+  OwningPtr<const MipsTargetLowering> TLInfo16;
+  OwningPtr<const MipsInstrInfo> InstrInfoSE;
+  OwningPtr<const MipsFrameLowering> FrameLoweringSE;
+  OwningPtr<const MipsTargetLowering> TLInfoSE;
   MipsSelectionDAGInfo TSInfo;
   MipsJITInfo JITInfo;
 
@@ -47,6 +55,8 @@ public:
 
   virtual ~MipsTargetMachine() {}
 
+  virtual void addAnalysisPasses(PassManagerBase &PM);
+
   virtual const MipsInstrInfo *getInstrInfo() const
   { return InstrInfo.get(); }
   virtual const TargetFrameLowering *getFrameLowering() const
@@ -73,6 +83,13 @@ public:
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+
+  // Set helper classes
+  void setHelperClassesMips16();
+
+  void setHelperClassesMipsSE();
+
+
 };
 
 /// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index 6a53a44..072c65d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -16,6 +16,7 @@
 #define LLVM_TARGET_NVPTX_H
 
 #include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -62,6 +63,9 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
 FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+ModulePass *createGenericToNVVMPass();
+ModulePass *createNVVMReflectPass();
+ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
 
 bool isImageOrSamplerVal(const Value *, const Module *);
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ce5d78a..229e4e5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
 namespace {
 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
 /// depends.
-void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+void DiscoverDependentGlobals(const Value *V,
+                              DenseSet<const GlobalVariable *> &Globals) {
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     Globals.insert(GV);
   else {
-    if (User *U = dyn_cast<User>(V)) {
+    if (const User *U = dyn_cast<User>(V)) {
       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
         DiscoverDependentGlobals(U->getOperand(i), Globals);
       }
@@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
 /// instances to be emitted, but only after any dependents have been added
 /// first.
 void VisitGlobalVariableForEmission(
-    GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
-    DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
+    const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
+    DenseSet<const GlobalVariable *> &Visited,
+    DenseSet<const GlobalVariable *> &Visiting) {
   // Have we already visited this one?
   if (Visited.count(GV))
     return;
@@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission(
   Visiting.insert(GV);
 
   // Make sure we visit all dependents first
-  DenseSet<GlobalVariable *> Others;
+  DenseSet<const GlobalVariable *> Others;
   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
     DiscoverDependentGlobals(GV->getOperand(i), Others);
 
-  for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
-                                            E = Others.end();
+  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
+                                                  E = Others.end();
        I != E; ++I)
     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
 
@@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
 
+  if (!GlobalsEmitted) {
+    emitGlobals(*MF->getFunction()->getParent());
+    GlobalsEmitted = true;
+  }
+  
   // Set up
   MRI = &MF->getRegInfo();
   F = MF->getFunction();
@@ -695,7 +702,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
   else
     O << ".func ";
   printReturnValStr(F, O);
-  O << *CurrentFnSym << "\n";
+  O << *Mang->getSymbol(F) << "\n";
   emitFunctionParamList(F, O);
   O << ";\n";
 }
@@ -795,7 +802,7 @@ static bool useFuncSeen(const Constant *C,
   return false;
 }
 
-void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
   llvm::DenseMap<const Function *, bool> seenMap;
   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
     const Function *F = FI;
@@ -805,7 +812,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
         continue;
       if (F->getIntrinsicID())
         continue;
-      CurrentFnSym = Mang->getSymbol(F);
       emitDeclaration(F, O);
       continue;
     }
@@ -817,14 +823,12 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
           // The use is in the initialization of a global variable
           // that is a function pointer, so print a declaration
           // for the original function
-          CurrentFnSym = Mang->getSymbol(F);
           emitDeclaration(F, O);
           break;
         }
         // Emit a declaration of this function if the function that
         // uses this constant expr has already been seen.
         if (useFuncSeen(C, seenMap)) {
-          CurrentFnSym = Mang->getSymbol(F);
           emitDeclaration(F, O);
           break;
         }
@@ -844,7 +848,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
       // appearing in the module before the callee. so print out
       // a declaration for the callee.
       if (seenMap.find(caller) != seenMap.end()) {
-        CurrentFnSym = Mang->getSymbol(F);
         emitDeclaration(F, O);
         break;
       }
@@ -921,6 +924,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
     recordAndEmitFilenames(M);
 
+  GlobalsEmitted = false;
+    
+  return false; // success
+}
+
+void NVPTXAsmPrinter::emitGlobals(const Module &M) {
   SmallString<128> Str2;
   raw_svector_ostream OS2(Str2);
 
@@ -931,13 +940,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   // global variable in order, and ensure that we emit it *after* its dependent
   // globals. We use a little extra memory maintaining both a set and a list to
   // have fast searches while maintaining a strict ordering.
-  SmallVector<GlobalVariable *, 8> Globals;
-  DenseSet<GlobalVariable *> GVVisited;
-  DenseSet<GlobalVariable *> GVVisiting;
+  SmallVector<const GlobalVariable *, 8> Globals;
+  DenseSet<const GlobalVariable *> GVVisited;
+  DenseSet<const GlobalVariable *> GVVisiting;
 
   // Visit each global variable, in order
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
-       ++I)
+  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
 
   assert(GVVisited.size() == M.getGlobalList().size() &&
@@ -951,7 +960,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   OS2 << '\n';
 
   OutStreamer.EmitRawText(OS2.str());
-  return false; // success
 }
 
 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
@@ -989,6 +997,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
 }
 
 bool NVPTXAsmPrinter::doFinalization(Module &M) {
+
+  // If we did not emit any functions, then the global declarations have not
+  // yet been emitted.
+  if (!GlobalsEmitted) {
+    emitGlobals(M);
+    GlobalsEmitted = true;
+  }
+
   // XXX Temproarily remove global variables so that doFinalization() will not
   // emit them again (global variables are emitted at beginning).
 
@@ -1063,7 +1079,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
   }
 }
 
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
+                                         raw_ostream &O,
                                          bool processDemoted) {
 
   // Skip meta data
@@ -1107,10 +1124,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
   if (llvm::isSampler(*GVar)) {
     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
 
-    Constant *Initializer = NULL;
+    const Constant *Initializer = NULL;
     if (GVar->hasInitializer())
       Initializer = GVar->getInitializer();
-    ConstantInt *CI = NULL;
+    const ConstantInt *CI = NULL;
     if (Initializer)
       CI = dyn_cast<ConstantInt>(Initializer);
     if (CI) {
@@ -1183,7 +1200,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
     if (localDecls.find(demotedFunc) != localDecls.end())
       localDecls[demotedFunc].push_back(GVar);
     else {
-      std::vector<GlobalVariable *> temp;
+      std::vector<const GlobalVariable *> temp;
       temp.push_back(GVar);
       localDecls[demotedFunc] = temp;
     }
@@ -1199,7 +1216,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
 
   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
     O << " .";
-    O << getPTXFundamentalTypeStr(ETy, false);
+    // Special case: ABI requires that we use .u8 for predicates
+    if (ETy->isIntegerTy(1))
+      O << "u8";
+    else
+      O << getPTXFundamentalTypeStr(ETy, false);
     O << " ";
     O << *Mang->getSymbol(GVar);
 
@@ -1209,7 +1230,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
         GVar->hasInitializer()) {
-      Constant *Initializer = GVar->getInitializer();
+      const Constant *Initializer = GVar->getInitializer();
       if (!Initializer->isNullValue()) {
         O << " = ";
         printScalarConstant(Initializer, O);
@@ -1233,7 +1254,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
           GVar->hasInitializer()) {
-        Constant *Initializer = GVar->getInitializer();
+        const Constant *Initializer = GVar->getInitializer();
         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
           AggBuffer aggBuffer(ElementSize, O, *this);
           bufferAggregateConstant(Initializer, &aggBuffer);
@@ -1283,7 +1304,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
   if (localDecls.find(f) == localDecls.end())
     return;
 
-  std::vector<GlobalVariable *> &gvars = localDecls[f];
+  std::vector<const GlobalVariable *> &gvars = localDecls[f];
 
   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
     O << "\t// demoted variable\n\t";
@@ -1448,7 +1469,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
                                      int paramIndex, raw_ostream &O) {
   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
-    O << *CurrentFnSym << "_param_" << paramIndex;
+    O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
   else {
     std::string argName = I->getName();
     const char *p = argName.c_str();
@@ -1507,11 +1528,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       if (llvm::isImage(*I)) {
         std::string sname = I->getName();
         if (llvm::isImageWriteOnly(*I))
-          O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
+          O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
+            << paramIndex;
         else // Default image is read_only
-          O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
+          O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
+            << paramIndex;
       } else // Should be llvm::isSampler(*I)
-        O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
+        O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
           << paramIndex;
       continue;
     }
@@ -1564,7 +1587,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
         }
 
         // non-pointer scalar to kernel func
-        O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
+        O << "\t.param .";
+        // Special case: predicate operands become .u8 types
+        if (Ty->isIntegerTy(1))
+          O << "u8";
+        else
+          O << getPTXFundamentalTypeStr(Ty);
+        O << " ";
         printParamName(I, paramIndex, O);
         continue;
       }
@@ -1751,12 +1780,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
   O << utohexstr(API.getZExtValue());
 }
 
-void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
     O << CI->getValue();
     return;
   }
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
     printFPConstant(CFP, O);
     return;
   }
@@ -1764,13 +1793,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
     O << "0";
     return;
   }
-  if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
     O << *Mang->getSymbol(GVar);
     return;
   }
-  if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-    Value *v = Cexpr->stripPointerCasts();
-    if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+    const Value *v = Cexpr->stripPointerCasts();
+    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
       O << *Mang->getSymbol(GVar);
       return;
     } else {
@@ -1781,7 +1810,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
   llvm_unreachable("Not scalar type found in printScalarConstant()");
 }
 
-void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
                                    AggBuffer *aggBuffer) {
 
   const DataLayout *TD = TM.getDataLayout();
@@ -1809,13 +1838,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
       ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
-      if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
         int int32 = (int)(constInt->getZExtValue());
         ptr = (unsigned char *)&int32;
         aggBuffer->addBytes(ptr, 4, Bytes);
         break;
-      } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
                 ConstantFoldConstantExpression(Cexpr, TD))) {
           int int32 = (int)(constInt->getZExtValue());
           ptr = (unsigned char *)&int32;
@@ -1831,13 +1860,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
       }
       llvm_unreachable("unsupported integer const type");
     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
-      if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
         long long int64 = (long long)(constInt->getZExtValue());
         ptr = (unsigned char *)&int64;
         aggBuffer->addBytes(ptr, 8, Bytes);
         break;
-      } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
                 ConstantFoldConstantExpression(Cexpr, TD))) {
           long long int64 = (long long)(constInt->getZExtValue());
           ptr = (unsigned char *)&int64;
@@ -1858,7 +1887,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   }
   case Type::FloatTyID:
   case Type::DoubleTyID: {
-    ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
     const Type *Ty = CFP->getType();
     if (Ty == Type::getFloatTy(CPV->getContext())) {
       float float32 = (float) CFP->getValueAPF().convertToFloat();
@@ -1874,10 +1903,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
     break;
   }
   case Type::PointerTyID: {
-    if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
       aggBuffer->addSymbol(GVar);
-    } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-      Value *v = Cexpr->stripPointerCasts();
+    } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+      const Value *v = Cexpr->stripPointerCasts();
       aggBuffer->addSymbol(v);
     }
     unsigned int s = TD->getTypeAllocSize(CPV->getType());
@@ -1906,7 +1935,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   }
 }
 
-void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
                                               AggBuffer *aggBuffer) {
   const DataLayout *TD = TM.getDataLayout();
   int Bytes;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6dc9fc0..7faa6b2 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     unsigned char *buffer; // the buffer
     unsigned numSymbols;   // number of symbol addresses
     SmallVector<unsigned, 4> symbolPosInBuffer;
-    SmallVector<Value *, 4> Symbols;
+    SmallVector<const Value *, 4> Symbols;
 
   private:
     unsigned curpos;
@@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
       }
       return curpos;
     }
-    void addSymbol(Value *GVar) {
+    void addSymbol(const Value *GVar) {
       symbolPosInBuffer.push_back(curpos);
       Symbols.push_back(GVar);
       numSymbols++;
@@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
           if (pos)
             O << ", ";
           if (pos == nextSymbolPos) {
-            Value *v = Symbols[nSym];
-            if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+            const Value *v = Symbols[nSym];
+            if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
               MCSymbol *Name = AP.Mang->getSymbol(GVar);
               O << *Name;
-            } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
+            } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
               O << *nvptx::LowerConstant(Cexpr, AP);
             } else
               llvm_unreachable("symbol type unknown");
@@ -205,10 +205,12 @@ private:
   void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
   // definition autogenerated.
   void printInstruction(const MachineInstr *MI, raw_ostream &O);
-  void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
+  void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
+                          bool = false);
   void printParamName(int paramIndex, raw_ostream &O);
   void printParamName(Function::const_arg_iterator I, int paramIndex,
                       raw_ostream &O);
+  void emitGlobals(const Module &M);
   void emitHeader(Module &M, raw_ostream &O);
   void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
   void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
@@ -234,6 +236,8 @@ protected:
 private:
   std::string CurrentBankselLabelInBasicBlock;
 
+  bool GlobalsEmitted;
+  
   // This is specific per MachineFunction.
   const MachineRegisterInfo *MRI;
   // The contents are specific for each
@@ -247,7 +251,7 @@ private:
   std::map<const Type *, std::string> TypeNameMap;
 
   // List of variables demoted to a function scope.
-  std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+  std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
 
   // To record filename to ID mapping
   std::map<std::string, unsigned> filenameMap;
@@ -256,15 +260,15 @@ private:
   void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
   void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
   std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
-  void printScalarConstant(Constant *CPV, raw_ostream &O);
+  void printScalarConstant(const Constant *CPV, raw_ostream &O);
   void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
-  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
-  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
+  void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+  void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
 
   void printOperandProper(const MachineOperand &MO);
 
   void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
-  void emitDeclarations(Module &, raw_ostream &O);
+  void emitDeclarations(const Module &, raw_ostream &O);
   void emitDeclaration(const Function *, raw_ostream &O);
 
   static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
new file mode 100644
index 0000000..1077c46
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -0,0 +1,436 @@
+//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Convert generic global variables into either .global or .const access based
+// on the variable's "constant" qualifier.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeGenericToNVVMPass(PassRegistry &);
+}
+
+namespace {
+class GenericToNVVM : public ModulePass {
+public:
+  static char ID;
+
+  GenericToNVVM() : ModulePass(ID) {}
+
+  virtual bool runOnModule(Module &M);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  }
+
+private:
+  Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
+                         IRBuilder<> &Builder);
+  Value *remapConstant(Module *M, Function *F, Constant *C,
+                       IRBuilder<> &Builder);
+  Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
+                                                Constant *C,
+                                                IRBuilder<> &Builder);
+  Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+                           IRBuilder<> &Builder);
+  void remapNamedMDNode(Module *M, NamedMDNode *N);
+  MDNode *remapMDNode(Module *M, MDNode *N);
+
+  typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
+  typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
+  GVMapTy GVMap;
+  ConstantToValueMapTy ConstantToValueMap;
+};
+}
+
+char GenericToNVVM::ID = 0;
+
+ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); }
+
+INITIALIZE_PASS(
+    GenericToNVVM, "generic-to-nvvm",
+    "Ensure that the global variables are in the global address space", false,
+    false)
+
+bool GenericToNVVM::runOnModule(Module &M) {
+  // Create a clone of each global variable that has the default address space.
+  // The clone is created with the global address space  specifier, and the pair
+  // of original global variable and its clone is placed in the GVMap for later
+  // use.
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E;) {
+    GlobalVariable *GV = I++;
+    if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
+        !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
+        !GV->getName().startswith("llvm.")) {
+      GlobalVariable *NewGV = new GlobalVariable(
+          M, GV->getType()->getElementType(), GV->isConstant(),
+          GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
+          "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
+      NewGV->copyAttributesFrom(GV);
+      GVMap[GV] = NewGV;
+    }
+  }
+
+  // Return immediately, if every global variable has a specific address space
+  // specifier.
+  if (GVMap.empty()) {
+    return false;
+  }
+
+  // Walk through the instructions in function defitinions, and replace any use
+  // of original global variables in GVMap with a use of the corresponding
+  // copies in GVMap.  If necessary, promote constants to instructions.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (I->isDeclaration()) {
+      continue;
+    }
+    IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg());
+    for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE;
+         ++BBI) {
+      for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE;
+           ++II) {
+        for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) {
+          Value *Operand = II->getOperand(i);
+          if (isa<Constant>(Operand)) {
+            II->setOperand(
+                i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+          }
+        }
+      }
+    }
+    ConstantToValueMap.clear();
+  }
+
+  // Walk through the metadata section and update the debug information
+  // associated with the global variables in the default address space.
+  for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+                                       E = M.named_metadata_end();
+       I != E; I++) {
+    remapNamedMDNode(&M, I);
+  }
+
+  // Walk through the global variable  initializers, and replace any use of
+  // original global variables in GVMap with a use of the corresponding copies
+  // in GVMap.  The copies need to be bitcast to the original global variable
+  // types, as we cannot use cvta in global variable initializers.
+  for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
+    GlobalVariable *GV = I->first;
+    GlobalVariable *NewGV = I->second;
+    ++I;
+    Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
+    // At this point, the remaining uses of GV should be found only in global
+    // variable initializers, as other uses have been already been removed
+    // while walking through the instructions in function definitions.
+    for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
+         UI != UE;) {
+      Use &U = (UI++).getUse();
+      U.set(BitCastNewGV);
+    }
+    std::string Name = GV->getName();
+    GV->removeDeadConstantUsers();
+    GV->eraseFromParent();
+    NewGV->setName(Name);
+  }
+  GVMap.clear();
+
+  return true;
+}
+
+Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
+                                      GlobalVariable *GV,
+                                      IRBuilder<> &Builder) {
+  PointerType *GVType = GV->getType();
+  Value *CVTA = NULL;
+
+  // See if the address space conversion requires the operand to be bitcast
+  // to i8 addrspace(n)* first.
+  EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true);
+  if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
+    // A bitcast to i8 addrspace(n)* on the operand is needed.
+    LLVMContext &Context = M->getContext();
+    unsigned int AddrSpace = GVType->getAddressSpace();
+    Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
+    CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
+    // Insert the address space conversion.
+    Type *ResultType =
+        PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
+    SmallVector<Type *, 2> ParamTypes;
+    ParamTypes.push_back(ResultType);
+    ParamTypes.push_back(DestTy);
+    Function *CVTAFunction = Intrinsic::getDeclaration(
+        M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+    CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
+    // Another bitcast from i8 * to <the element type of GVType> * is
+    // required.
+    DestTy =
+        PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC);
+    CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
+  } else {
+    // A simple CVTA is enough.
+    SmallVector<Type *, 2> ParamTypes;
+    ParamTypes.push_back(PointerType::get(GVType->getElementType(),
+                                          llvm::ADDRESS_SPACE_GENERIC));
+    ParamTypes.push_back(GVType);
+    Function *CVTAFunction = Intrinsic::getDeclaration(
+        M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+    CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
+  }
+
+  return CVTA;
+}
+
+Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
+                                    IRBuilder<> &Builder) {
+  // If the constant C has been converted already in the given function  F, just
+  // return the converted value.
+  ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C);
+  if (CTII != ConstantToValueMap.end()) {
+    return CTII->second;
+  }
+
+  Value *NewValue = C;
+  if (isa<GlobalVariable>(C)) {
+    // If the constant C is a global variable and is found in  GVMap, generate a
+    // set set of instructions that convert the clone of C with the global
+    // address space specifier to a generic pointer.
+    // The constant C cannot be used here, as it will be erased from the
+    // module eventually.  And the clone of C with the global address space
+    // specifier cannot be used here either, as it will affect the types of
+    // other instructions in the function.  Hence, this address space conversion
+    // is required.
+    GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
+    if (I != GVMap.end()) {
+      NewValue = getOrInsertCVTA(M, F, I->second, Builder);
+    }
+  } else if (isa<ConstantVector>(C) || isa<ConstantArray>(C) ||
+             isa<ConstantStruct>(C)) {
+    // If any element in the constant vector or aggregate C is or uses a global
+    // variable in GVMap, the constant C needs to be reconstructed, using a set
+    // of instructions.
+    NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder);
+  } else if (isa<ConstantExpr>(C)) {
+    // If any operand in the constant expression C is or uses a global variable
+    // in GVMap, the constant expression C needs to be reconstructed, using a
+    // set of instructions.
+    NewValue = remapConstantExpr(M, F, cast<ConstantExpr>(C), Builder);
+  }
+
+  ConstantToValueMap[C] = NewValue;
+  return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
+    Module *M, Function *F, Constant *C, IRBuilder<> &Builder) {
+  bool OperandChanged = false;
+  SmallVector<Value *, 4> NewOperands;
+  unsigned NumOperands = C->getNumOperands();
+
+  // Check if any element is or uses a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    Value *Operand = C->getOperand(i);
+    Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the elements has been modified, return C as it is.
+  if (!OperandChanged) {
+    return C;
+  }
+
+  // If any of the elements has been  modified, construct the equivalent
+  // vector or aggregate value with a set instructions and the converted
+  // elements.
+  Value *NewValue = UndefValue::get(C->getType());
+  if (isa<ConstantVector>(C)) {
+    for (unsigned i = 0; i < NumOperands; ++i) {
+      Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
+      NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx);
+    }
+  } else {
+    for (unsigned i = 0; i < NumOperands; ++i) {
+      NewValue =
+          Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i));
+    }
+  }
+
+  return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+                                        IRBuilder<> &Builder) {
+  bool OperandChanged = false;
+  SmallVector<Value *, 4> NewOperands;
+  unsigned NumOperands = C->getNumOperands();
+
+  // Check if any operand is or uses a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    Value *Operand = C->getOperand(i);
+    Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the operands has been modified, return C as it is.
+  if (!OperandChanged) {
+    return C;
+  }
+
+  // If any of the operands has been modified, construct the instruction with
+  // the converted operands.
+  unsigned Opcode = C->getOpcode();
+  switch (Opcode) {
+  case Instruction::ICmp:
+    // CompareConstantExpr (icmp)
+    return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()),
+                              NewOperands[0], NewOperands[1]);
+  case Instruction::FCmp:
+    // CompareConstantExpr (fcmp)
+    assert(false && "Address space conversion should have no effect "
+                    "on float point CompareConstantExpr (fcmp)!");
+    return C;
+  case Instruction::ExtractElement:
+    // ExtractElementConstantExpr
+    return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
+  case Instruction::InsertElement:
+    // InsertElementConstantExpr
+    return Builder.CreateInsertElement(NewOperands[0], NewOperands[1],
+                                       NewOperands[2]);
+  case Instruction::ShuffleVector:
+    // ShuffleVector
+    return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
+                                       NewOperands[2]);
+  case Instruction::ExtractValue:
+    // ExtractValueConstantExpr
+    return Builder.CreateExtractValue(NewOperands[0], C->getIndices());
+  case Instruction::InsertValue:
+    // InsertValueConstantExpr
+    return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
+                                     C->getIndices());
+  case Instruction::GetElementPtr:
+    // GetElementPtrConstantExpr
+    return cast<GEPOperator>(C)->isInBounds()
+               ? Builder.CreateGEP(
+                     NewOperands[0],
+                     makeArrayRef(&NewOperands[1], NumOperands - 1))
+               : Builder.CreateInBoundsGEP(
+                     NewOperands[0],
+                     makeArrayRef(&NewOperands[1], NumOperands - 1));
+  case Instruction::Select:
+    // SelectConstantExpr
+    return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]);
+  default:
+    // BinaryConstantExpr
+    if (Instruction::isBinaryOp(Opcode)) {
+      return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()),
+                                 NewOperands[0], NewOperands[1]);
+    }
+    // UnaryConstantExpr
+    if (Instruction::isCast(Opcode)) {
+      return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
+                                NewOperands[0], C->getType());
+    }
+    assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
+    return C;
+  }
+}
+
+void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+
+  bool OperandChanged = false;
+  SmallVector<MDNode *, 16> NewOperands;
+  unsigned NumOperands = N->getNumOperands();
+
+  // Check if any operand is or contains a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    MDNode *Operand = N->getOperand(i);
+    MDNode *NewOperand = remapMDNode(M, Operand);
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the operands has been modified, return immediately.
+  if (!OperandChanged) {
+    return;
+  }
+
+  // Replace the old operands with the new operands.
+  N->dropAllReferences();
+  for (SmallVector<MDNode *, 16>::iterator I = NewOperands.begin(),
+                                           E = NewOperands.end();
+       I != E; ++I) {
+    N->addOperand(*I);
+  }
+}
+
+MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
+
+  bool OperandChanged = false;
+  SmallVector<Value *, 8> NewOperands;
+  unsigned NumOperands = N->getNumOperands();
+
+  // Check if any operand is or contains a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    Value *Operand = N->getOperand(i);
+    Value *NewOperand = Operand;
+    if (Operand) {
+      if (isa<GlobalVariable>(Operand)) {
+        GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
+        if (I != GVMap.end()) {
+          NewOperand = I->second;
+          if (++i < NumOperands) {
+            NewOperands.push_back(NewOperand);
+            // Address space of the global variable follows the global variable
+            // in the global variable debug info (see createGlobalVariable in
+            // lib/Analysis/DIBuilder.cpp).
+            NewOperand =
+                ConstantInt::get(Type::getInt32Ty(M->getContext()),
+                                 I->second->getType()->getAddressSpace());
+          }
+        }
+      } else if (isa<MDNode>(Operand)) {
+        NewOperand = remapMDNode(M, cast<MDNode>(Operand));
+      }
+    }
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the operands has been modified, return N as it is.
+  if (!OperandChanged) {
+    return N;
+  }
+
+  // If any of the operands has been modified, create a new MDNode with the new
+  // operands.
+  return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e862988..d4378c2 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32(
              " IEEE Compliant F32 div.rnd if avaiable."),
     cl::init(2));
 
+static cl::opt<bool>
+UsePrecSqrtF32("nvptx-prec-sqrtf32",
+          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+          cl::init(true));
+
 /// createNVPTXISelDag - This pass converts a legalized DAG into a
 /// NVPTX-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
 
   // Decide how to translate f32 div
   do_DIVF32_PREC = UsePrecDivF32;
+  // Decide how to translate f32 sqrt
+  do_SQRTF32_PREC = UsePrecSqrtF32;
   // sm less than sm_20 does not support div.rnd. Use div.full.
   if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
     do_DIVF32_PREC = 1;
@@ -241,7 +248,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), Addr, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
                  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -270,7 +277,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
                  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -324,7 +331,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (TargetVT) {
@@ -376,7 +383,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), N1, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   }
 
   if (NVPTXLD != NULL) {
@@ -501,7 +508,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Addr, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
                  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
@@ -555,7 +562,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
                  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
@@ -659,7 +666,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
 
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
@@ -760,7 +767,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Op1, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   }
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -962,7 +969,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
   }
 
   SDValue Ops[] = { Op1, Chain };
-  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
@@ -1055,7 +1062,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), Addr, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
                  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
@@ -1084,7 +1091,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
                  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
@@ -1138,7 +1145,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (SourceVT) {
@@ -1190,7 +1197,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), N2, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   }
 
   if (NVPTXST != NULL) {
@@ -1569,7 +1576,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
 
   StOps.push_back(Chain);
 
-  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 70e8e46..ed16d44 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   //    Otherwise, use div.full
   int do_DIVF32_PREC;
 
+  // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
+  // is true, then generate the corresponding FTZ version.
+  bool do_SQRTF32_PREC;
+
   // If true, add .ftz to f32 instructions.
   // This is only meaningful for sm_20 and later, as the default
   // is not ftz.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index f43abe2..da6dd39 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
 
+def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
+def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
+
 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
 
 def true : Predicate<"1">;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 49e2568..24037ca 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
   Float64Regs, int_nvvm_sqrt_rp_d>;
 
+// nvvm_sqrt intrinsic
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
+
 //
 // Rsqrt
 //
@@ -1510,38 +1520,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
+defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
 
 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
-
-def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
-               "mov.u32 \t$result, $src;",
-     [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
-def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
-               "mov.u64 \t$result, $src;",
-     [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
-
-
-
-// @TODO: Revisit this.  There is a type
-// contradiction between iPTRAny and iPTR for the def.
-/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
-               "mov.u32 \t$result, $src;",
-     [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
-     (Wrapper tglobaladdr:$src)))]>;
-def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
-               "mov.u64 \t$result, $src;",
-     [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
-     (Wrapper tglobaladdr:$src)))]>;*/
-
-
-def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
-            "mov.u32 \t$result, $src;",
-     [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
-def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
-            "mov.u64 \t$result, $src;",
-     [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
+defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
 
 
 // nvvm.ptr.gen.to.param
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
index e166be5..e57ace9 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -32,7 +32,8 @@ public:
   /// Override this as NVPTX has its own way of printing switching
   /// to a section.
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                    raw_ostream &OS) const {}
+                                    raw_ostream &OS,
+                                    const MCExpr *Subsection) const {}
 
   /// Base address of PTX sections is zero.
   virtual bool isBaseAddressKnownZero() const { return true; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 67ca6b5..1ae2a7c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -49,6 +49,7 @@ using namespace llvm;
 
 namespace llvm {
 void initializeNVVMReflectPass(PassRegistry&);
+void initializeGenericToNVVMPass(PassRegistry&);
 }
 
 extern "C" void LLVMInitializeNVPTXTarget() {
@@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   // FIXME: This pass is really intended to be invoked during IR optimization,
   // but it's very NVPTX-specific.
   initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+  initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
 }
 
 NVPTXTargetMachine::NVPTXTargetMachine(
@@ -100,6 +102,7 @@ public:
     return getTM<NVPTXTargetMachine>();
   }
 
+  virtual void addIRPasses();
   virtual bool addInstSelector();
   virtual bool addPreRegAlloc();
 };
@@ -110,6 +113,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
   return PassConfig;
 }
 
+void NVPTXPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  addPass(createGenericToNVVMPass());
+}
+
 bool NVPTXPassConfig::addInstSelector() {
   addPass(createLowerAggrCopies());
   addPass(createSplitBBatBarPass());
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 0ad62ce..3cc324b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -14,6 +14,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTX.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
@@ -40,7 +41,7 @@ using namespace llvm;
 namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
 
 namespace {
-class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+class NVVMReflect : public ModulePass {
 private:
   StringMap<int> VarMap;
   typedef DenseMap<std::string, int>::iterator VarMapIter;
@@ -48,9 +49,18 @@ private:
 
 public:
   static char ID;
-  NVVMReflect() : ModulePass(ID) {
+  NVVMReflect() : ModulePass(ID), ReflectFunction(0) {
+    initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
     VarMap.clear();
-    ReflectFunction = 0;
+  }
+
+  NVVMReflect(const StringMap<int> &Mapping)
+  : ModulePass(ID), ReflectFunction(0) {
+    initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+    for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
+         I != E; ++I) {
+      VarMap[(*I).getKey()] = (*I).getValue();
+    }
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
@@ -60,6 +70,14 @@ public:
 };
 }
 
+ModulePass *llvm::createNVVMReflectPass() {
+  return new NVVMReflect();
+}
+
+ModulePass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping) {
+  return new NVVMReflect(Mapping);
+}
+
 static cl::opt<bool>
 NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
                    cl::desc("NVVM reflection, enabled by default"));
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
new file mode 100644
index 0000000..f2cb8b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -0,0 +1,739 @@
+//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static unsigned RRegs[32] = {
+  PPC::R0,  PPC::R1,  PPC::R2,  PPC::R3,
+  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
+  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
+  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+  PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned RRegsNoR0[32] = {
+  PPC::ZERO,
+            PPC::R1,  PPC::R2,  PPC::R3,
+  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
+  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
+  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+  PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned XRegs[32] = {
+  PPC::X0,  PPC::X1,  PPC::X2,  PPC::X3,
+  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
+  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
+  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+  PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned XRegsNoX0[32] = {
+  PPC::ZERO8,
+            PPC::X1,  PPC::X2,  PPC::X3,
+  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
+  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
+  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+  PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned FRegs[32] = {
+  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
+  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
+  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+static unsigned VRegs[32] = {
+  PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
+  PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
+  PPC::V8,  PPC::V9,  PPC::V10, PPC::V11,
+  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+  PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+static unsigned CRBITRegs[32] = {
+  PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+  PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+  PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+  PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+  PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+  PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+  PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+  PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+static unsigned CRRegs[8] = {
+  PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+  PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+struct PPCOperand;
+
+class PPCAsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+  bool IsPPC64;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  bool isPPC64() const { return IsPPC64; }
+
+  bool MatchRegisterName(const AsmToken &Tok,
+                         unsigned &RegNo, int64_t &IntVal);
+
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+  bool ParseDirectiveTC(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
+
+  void ProcessInstruction(MCInst &Inst,
+                          const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "PPCGenAsmMatcher.inc"
+
+  /// }
+
+
+public:
+  PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+    // Check for 64-bit vs. 32-bit pointer mode.
+    Triple TheTriple(STI.getTargetTriple());
+    IsPPC64 = TheTriple.getArch() == Triple::ppc64;
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  virtual bool ParseInstruction(ParseInstructionInfo &Info,
+                                StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// PPCOperand - Instances of this class represent a parsed PowerPC machine
+/// instruction.
+struct PPCOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Immediate,
+    Expression
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+  bool IsPPC64;
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct ImmOp {
+    int64_t Val;
+  };
+
+  struct ExprOp {
+    const MCExpr *Val;
+  };
+
+  union {
+    struct TokOp Tok;
+    struct ImmOp Imm;
+    struct ExprOp Expr;
+  };
+
+  PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    IsPPC64 = o.IsPPC64;
+    switch (Kind) {
+    case Token:
+      Tok = o.Tok;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Expression:
+      Expr = o.Expr;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  /// isPPC64 - True if this operand is for an instruction in 64-bit mode.
+  bool isPPC64() const { return IsPPC64; }
+
+  int64_t getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getExpr() const {
+    assert(Kind == Expression && "Invalid access!");
+    return Expr.Val;
+  }
+
+  unsigned getReg() const {
+    assert(isRegNumber() && "Invalid access!");
+    return (unsigned) Imm.Val;
+  }
+
+  unsigned getCCReg() const {
+    assert(isCCRegNumber() && "Invalid access!");
+    return (unsigned) Imm.Val;
+  }
+
+  unsigned getCRBitMask() const {
+    assert(isCRBitMask() && "Invalid access!");
+    return 7 - CountTrailingZeros_32(Imm.Val);
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate || Kind == Expression; }
+  bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
+  bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
+  bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
+  bool isU16Imm() const { return Kind == Expression ||
+                                 (Kind == Immediate && isUInt<16>(getImm())); }
+  bool isS16Imm() const { return Kind == Expression ||
+                                 (Kind == Immediate && isInt<16>(getImm())); }
+  bool isS16ImmX4() const { return Kind == Expression ||
+                                   (Kind == Immediate && isInt<16>(getImm()) &&
+                                    (getImm() & 3) == 0); }
+  bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+  bool isCCRegNumber() const { return Kind == Immediate &&
+                                      isUInt<3>(getImm()); }
+  bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
+                                    isPowerOf2_32(getImm()); }
+  bool isMem() const { return false; }
+  bool isReg() const { return false; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    llvm_unreachable("addRegOperands");
+  }
+
+  void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()]));
+  }
+
+  void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()]));
+  }
+
+  void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()]));
+  }
+
+  void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()]));
+  }
+
+  void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
+    if (isPPC64())
+      addRegG8RCOperands(Inst, N);
+    else
+      addRegGPRCOperands(Inst, N);
+  }
+
+  void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const {
+    if (isPPC64())
+      addRegG8RCNoX0Operands(Inst, N);
+    else
+      addRegGPRCNoR0Operands(Inst, N);
+  }
+
+  void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+  }
+
+  void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+  }
+
+  void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
+  }
+
+  void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()]));
+  }
+
+  void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()]));
+  }
+
+  void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()]));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Immediate)
+      Inst.addOperand(MCOperand::CreateImm(getImm()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+  }
+
+  void addDispRIOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Immediate)
+      Inst.addOperand(MCOperand::CreateImm(getImm()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+  }
+
+  void addDispRIXOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Immediate)
+      Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+
+  static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+
+  static PPCOperand *CreateExpr(const MCExpr *Val,
+                                SMLoc S, SMLoc E, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Expression);
+    Op->Expr.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void PPCOperand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  case Immediate:
+    OS << getImm();
+    break;
+  case Expression:
+    getExpr()->print(OS);
+    break;
+  }
+}
+
+
+void PPCAsmParser::
+ProcessInstruction(MCInst &Inst,
+                   const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  switch (Inst.getOpcode()) {
+  case PPC::SLWI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLWINM);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(0));
+    TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SRWI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLWINM);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(31));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SLDI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLDICR);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SRDI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLDICL);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    Inst = TmpInst;
+    break;
+  }
+  }
+}
+
+bool PPCAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
+  MCInst Inst;
+
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+  default: break;
+  case Match_Success:
+    // Post-process instructions (typically extended mnemonics)
+    ProcessInstruction(Inst, Operands);
+    Inst.setLoc(IDLoc);
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "instruction use requires an option to be enabled");
+  case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+}
+
+bool PPCAsmParser::
+MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
+  if (Tok.is(AsmToken::Identifier)) {
+    StringRef Name = Tok.getString();
+
+    if (Name.equals_lower("lr")) {
+      RegNo = isPPC64()? PPC::LR8 : PPC::LR;
+      IntVal = 8;
+      return false;
+    } else if (Name.equals_lower("ctr")) {
+      RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
+      IntVal = 9;
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("r") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("f") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = FRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("v") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = VRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 2).equals_lower("cr") &&
+               !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+      RegNo = CRRegs[IntVal];
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool PPCAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  const AsmToken &Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  RegNo = 0;
+  int64_t IntVal;
+
+  if (!MatchRegisterName(Tok, RegNo, IntVal)) {
+    Parser.Lex(); // Eat identifier token.
+    return false;
+  }
+
+  return Error(StartLoc, "invalid register name");
+}
+
+bool PPCAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  const MCExpr *EVal;
+  PPCOperand *Op;
+
+  // Attempt to parse the next token as an immediate
+  switch (getLexer().getKind()) {
+  // Special handling for register names.  These are interpreted
+  // as immediates corresponding to the register number.
+  case AsmToken::Percent:
+    Parser.Lex(); // Eat the '%'.
+    unsigned RegNo;
+    int64_t IntVal;
+    if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+      Parser.Lex(); // Eat the identifier token.
+      Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+      Operands.push_back(Op);
+      return false;
+    }
+    return Error(S, "invalid register name");
+
+  // All other expressions
+  case AsmToken::LParen:
+  case AsmToken::Plus:
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Identifier:
+  case AsmToken::Dot:
+  case AsmToken::Dollar:
+    if (!getParser().parseExpression(EVal))
+      break;
+    /* fall through */
+  default:
+    return Error(S, "unknown operand");
+  }
+
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(EVal))
+    Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64());
+  else
+    Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64());
+
+  // Push the parsed operand into the list of operands
+  Operands.push_back(Op);
+
+  // Check for D-form memory operands
+  if (getLexer().is(AsmToken::LParen)) {
+    Parser.Lex(); // Eat the '('.
+    S = Parser.getTok().getLoc();
+
+    int64_t IntVal;
+    switch (getLexer().getKind()) {
+    case AsmToken::Percent:
+      Parser.Lex(); // Eat the '%'.
+      unsigned RegNo;
+      if (MatchRegisterName(Parser.getTok(), RegNo, IntVal))
+        return Error(S, "invalid register name");
+      Parser.Lex(); // Eat the identifier token.
+      break;
+
+    case AsmToken::Integer:
+      if (getParser().parseAbsoluteExpression(IntVal) ||
+          IntVal < 0 || IntVal > 31)
+        return Error(S, "invalid register number");
+      break;
+
+    default:
+      return Error(S, "invalid memory operand");
+    }
+
+    if (getLexer().isNot(AsmToken::RParen))
+      return Error(Parser.getTok().getLoc(), "missing ')'");
+    E = Parser.getTok().getLoc();
+    Parser.Lex(); // Eat the ')'.
+
+    Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+    Operands.push_back(Op);
+  }
+
+  return false;
+}
+
+/// Parse an instruction mnemonic followed by its operands.
+bool PPCAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // The first operand is the token for the instruction name.
+  // If the instruction ends in a '.', we need to create a separate
+  // token for it, to match what TableGen is doing.
+  size_t Dot = Name.find('.');
+  StringRef Mnemonic = Name.slice(0, Dot);
+  Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
+  if (Dot != StringRef::npos) {
+    SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot);
+    StringRef DotStr = Name.slice(Dot, StringRef::npos);
+    Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
+  }
+
+  // If there are no more operands then finish
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return false;
+
+  // Parse the first operand
+  if (ParseOperand(Operands))
+    return true;
+
+  while (getLexer().isNot(AsmToken::EndOfStatement) &&
+         getLexer().is(AsmToken::Comma)) {
+    // Consume the comma token
+    getLexer().Lex();
+
+    // Parse the next operand
+    if (ParseOperand(Operands))
+      return true;
+  }
+
+  return false;
+}
+
+/// ParseDirective parses the PPC specific directives
+bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  if (IDVal == ".tc")
+    return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+/// ParseDirectiveTC
+///  ::= .tc [ symbol (, expression)* ]
+bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+  // Skip TC symbol, which is only used with XCOFF.
+  while (getLexer().isNot(AsmToken::EndOfStatement)
+         && getLexer().isNot(AsmToken::Comma))
+    Parser.Lex();
+  if (getLexer().isNot(AsmToken::Comma))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  // Align to word size.
+  getParser().getStreamer().EmitValueToAlignment(Size);
+
+  // Emit expressions.
+  return ParseDirectiveWord(Size, L);
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmParser() {
+  RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
+  RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "PPCGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index bacc108..93fca00 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -151,8 +151,8 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
     return printOperand(MI, OpNo, O);
 
   // Branches can take an immediate operand.  This is used by the branch
-  // selection pass to print $+8, an eight byte displacement from the PC.
-  O << "$+";
+  // selection pass to print .+8, an eight byte displacement from the PC.
+  O << ".+";
   printAbsAddrOperand(MI, OpNo, O);
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 84e4175..7a84723 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -77,6 +77,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
     case PPC::fixup_ppc_br24:
       Type = ELF::R_PPC_REL24;
       break;
+    case PPC::fixup_ppc_brcond14:
+      Type = ELF::R_PPC_REL14;
+      break;
     case FK_Data_4:
     case FK_PCRel_4:
       Type = ELF::R_PPC_REL32;
@@ -104,7 +107,8 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
         Type = ELF::R_PPC64_DTPREL16_HA;
         break;
-      case MCSymbolRefExpr::VK_None:
+      case MCSymbolRefExpr::VK_PPC_GAS_HA16:
+      case MCSymbolRefExpr::VK_PPC_DARWIN_HA16:
         Type = ELF::R_PPC_ADDR16_HA;
 	break;
       case MCSymbolRefExpr::VK_PPC_TOC16_HA:
@@ -131,6 +135,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
         Type = ELF::R_PPC64_DTPREL16_LO;
         break;
       case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC_ADDR16;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GAS_LO16:
+      case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
         Type = ELF::R_PPC_ADDR16_LO;
 	break;
       case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
@@ -153,6 +161,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC64_ADDR16_DS;
         break;
+      case MCSymbolRefExpr::VK_PPC_GAS_LO16:
+      case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
+        Type = ELF::R_PPC64_ADDR16_LO_DS;
+        break;
       case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
         Type = ELF::R_PPC64_TOC16_DS;
 	break;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index d84eb9c..853e505 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -29,3 +29,18 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
   }
   llvm_unreachable("Unknown PPC branch opcode!");
 }
+
+PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
+  switch (Opcode) {
+  case PPC::PRED_EQ: return PPC::PRED_EQ;
+  case PPC::PRED_NE: return PPC::PRED_NE;
+  case PPC::PRED_LT: return PPC::PRED_GT;
+  case PPC::PRED_GE: return PPC::PRED_LE;
+  case PPC::PRED_GT: return PPC::PRED_LT;
+  case PPC::PRED_LE: return PPC::PRED_GE;
+  case PPC::PRED_NU: return PPC::PRED_NU;
+  case PPC::PRED_UN: return PPC::PRED_UN;
+  }
+  llvm_unreachable("Unknown PPC branch opcode!");
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index ad2b018..444758c 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -37,6 +37,10 @@ namespace PPC {
   
   /// Invert the specified predicate.  != -> ==, < -> >=.
   Predicate InvertPredicate(Predicate Opcode);
+
+  /// Assume the condition register is set by MI(a,b), return the predicate if
+  /// we modify the instructions such that condition register is set by MI(b,a).
+  Predicate getSwappedPredicate(Predicate Opcode);
 }
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index 446b685..b4be51a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -31,6 +31,7 @@ namespace llvm {
   class MCInst;
 
   FunctionPass *createPPCCTRLoops();
+  FunctionPass *createPPCEarlyReturnPass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -40,7 +41,7 @@ namespace llvm {
 
   /// \brief Creates an PPC-specific Target Transformation Info pass.
   ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
-  
+
   namespace PPCII {
     
   /// Target Operand Flag enum.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 3892162..eb73c67 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -95,6 +95,43 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 // VSX          p7                 vector-scalar instruction set
 
 //===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// RecFormRel - Filter class used to relate non-record-form instructions with
+// their record-form variants.
+class RecFormRel;
+
+//===----------------------------------------------------------------------===//
+// Relation Map Definitions.
+//===----------------------------------------------------------------------===//
+
+def getRecordFormOpcode : InstrMapping {
+  let FilterClass = "RecFormRel";
+  // Instructions with the same BaseName and Interpretation64Bit values
+  // form a row.
+  let RowFields = ["BaseName", "Interpretation64Bit"];
+  // Instructions with the same RC value form a column.
+  let ColFields = ["RC"];
+  // The key column are the non-record-form instructions.
+  let KeyCol = ["0"];
+  // Value columns RC=1
+  let ValueCols = [["1"]];
+}
+
+def getNonRecordFormOpcode : InstrMapping {
+  let FilterClass = "RecFormRel";
+  // Instructions with the same BaseName and Interpretation64Bit values
+  // form a row.
+  let RowFields = ["BaseName", "Interpretation64Bit"];
+  // Instructions with the same RC value form a column.
+  let ColFields = ["RC"];
+  // The key column are the record-form instructions.
+  let KeyCol = ["1"];
+  // Value columns are RC=0
+  let ValueCols = [["0"]];
+}
+
+//===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
@@ -216,7 +253,6 @@ def : ProcessorModel<"ppc64", G5Model,
                    FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
 
-
 //===----------------------------------------------------------------------===//
 // Calling Conventions
 //===----------------------------------------------------------------------===//
@@ -232,9 +268,14 @@ def PPCAsmWriter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def PPCAsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 0;
+}
+
 def PPC : Target {
   // Information about the instructions.
   let InstructionSet = PPCInstrInfo;
   
   let AssemblyWriters = [PPCAsmWriter];
+  let AssemblyParsers = [PPCAsmParser];
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 96a9f0a..3c7cc4e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -721,7 +721,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
     return AsmPrinter::EmitFunctionEntryLabel();
     
   // Emit an official procedure descriptor.
-  const MCSection *Current = OutStreamer.getCurrentSection();
+  MCSectionSubPair Current = OutStreamer.getCurrentSection();
   const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
       ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
       SectionKind::getReadOnly());
@@ -741,7 +741,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
                         8/*size*/);
   // Emit a null environment pointer.
   OutStreamer.EmitIntValue(0, 8 /* size */);
-  OutStreamer.SwitchSection(Current);
+  OutStreamer.SwitchSection(Current.first, Current.second);
 
   MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
                           ".L." + Twine(CurrentFnSym->getName()));
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index bd1c378..3e608ca 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -112,15 +112,21 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
       unsigned MBBStartOffset = 0;
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
            I != E; ++I) {
-        if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+        MachineBasicBlock *Dest = 0;
+        if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
+          Dest = I->getOperand(2).getMBB();
+        else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
+                  I->getOpcode() == PPC::BDZ8  || I->getOpcode() == PPC::BDZ) &&
+                 !I->getOperand(0).isImm())
+          Dest = I->getOperand(0).getMBB();
+
+        if (!Dest) {
           MBBStartOffset += TII->GetInstSizeInBytes(I);
           continue;
         }
         
         // Determine the offset from the current branch to the destination
         // block.
-        MachineBasicBlock *Dest = I->getOperand(2).getMBB();
-        
         int BranchSize;
         if (Dest->getNumber() <= MBB.getNumber()) {
           // If this is a backwards branch, the delta is the offset from the
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 3244b90..c845909 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -223,9 +223,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
-  // to adjust the stack pointer (we fit in the Red Zone).  For 64-bit
-  // SVR4, we also require a stack frame if we need to spill the CR,
-  // since this spill area is addressed relative to the stack pointer.
+  // to adjust the stack pointer (we fit in the Red Zone).
   // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
   // stackless code if all local vars are reg-allocated.
   bool DisableRedZone = MF.getFunction()->getAttributes().
@@ -237,9 +235,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->adjustsStack() &&                      // No calls.
-      !(Subtarget.isPPC64() &&                     // No 64-bit SVR4 CRsave.
-	Subtarget.isSVR4ABI()
-	&& spillsCR(MF)) &&
       (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
     // No need for frame
     if (UpdateMF)
@@ -373,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF);
 
@@ -394,6 +390,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
 
+    if (!MustSaveCRs.empty()) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
+      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+        MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+    }
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
         .addReg(PPC::X31)
@@ -405,6 +408,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::X0)
         .addImm(LROffset / 4)
         .addReg(PPC::X1);
+
+    if (!MustSaveCRs.empty())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+        .addReg(PPC::X12, getKillRegState(true))
+        .addImm(8)
+        .addReg(PPC::X1);
   } else {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
@@ -417,6 +426,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addImm(FPOffset)
         .addReg(PPC::R1);
 
+    assert(MustSaveCRs.empty() &&
+           "Prologue CR saving supported only in 64-bit mode");
+
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
         .addReg(PPC::R0)
@@ -580,7 +592,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       // spilled CRs.
       if (Subtarget.isSVR4ABI()
 	  && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
-	  && !spillsCR(MF))
+	  && MustSaveCRs.empty())
 	continue;
 
       // For 64-bit SVR4 when we have spilled CRs, the spill location
@@ -636,6 +648,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   // Check if the link register (LR) has been saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF);
 
@@ -736,10 +749,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
         .addImm(LROffset/4).addReg(PPC::X1);
 
+    if (!MustSaveCRs.empty())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
+        .addImm(8).addReg(PPC::X1);
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
         .addImm(FPOffset/4).addReg(PPC::X1);
 
+    if (!MustSaveCRs.empty())
+      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i])
+          .addReg(PPC::X12, getKillRegState(i == e-1));
+
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
   } else {
@@ -747,6 +769,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
           .addImm(LROffset).addReg(PPC::R1);
 
+    assert(MustSaveCRs.empty() &&
+           "Epilogue CR restoring supported only in 64-bit mode");
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
           .addImm(FPOffset).addReg(PPC::R1);
@@ -1122,44 +1147,42 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
   DebugLoc DL;
   bool CRSpilled = false;
+  MachineInstrBuilder CRMIB;
   
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
     // CR2 through CR4 are the nonvolatile CR fields.
     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
 
-    if (CRSpilled && IsCRField)
-      continue;
-
     // Add the callee-saved register as live-in; it's killed at the spill.
     MBB.addLiveIn(Reg);
 
+    if (CRSpilled && IsCRField) {
+      CRMIB.addReg(Reg, RegState::ImplicitKill);
+      continue;
+    }
+
     // Insert the spill to the stack frame.
     if (IsCRField) {
-      CRSpilled = true;
-      // The first time we see a CR field, store the whole CR into the
-      // save slot via GPR12 (available in the prolog for 32- and 64-bit).
+      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
       if (Subtarget.isPPC64()) {
-	// 64-bit:  SP+8
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
-			       .addReg(PPC::X12,
-				       getKillRegState(true))
-			       .addImm(8)
-			       .addReg(PPC::X1));
+        // The actual spill will happen at the start of the prologue.
+        FuncInfo->addMustSaveCR(Reg);
       } else {
+        CRSpilled = true;
+        FuncInfo->setSpillsCR();
+
 	// 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
 	// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12));
+	CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+                  .addReg(Reg, RegState::ImplicitKill);
+
+	MBB.insert(MI, CRMIB);
 	MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
 					 .addReg(PPC::R12,
 						 getKillRegState(true)),
 					 CSI[i].getFrameIdx()));
       }
-      
-      // Record that we spill the CR in this function.
-      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
-      FuncInfo->setSpillsCR();
     } else {
       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
       TII.storeRegToStackSlot(MBB, MI, Reg, true,
@@ -1170,7 +1193,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 }
 
 static void
-restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+restoreCRs(bool isPPC64, bool is31,
+           bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
 	   MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
 	   const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
 
@@ -1180,14 +1204,10 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
   DebugLoc DL;
   unsigned RestoreOp, MoveReg;
 
-  if (isPPC64) {
-    // 64-bit:  SP+8
-    MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
-	       .addImm(8)
-	       .addReg(PPC::X1));
-    RestoreOp = PPC::MTCRF8;
-    MoveReg = PPC::X12;
-  } else {
+  if (isPPC64)
+    // This is handled during epilogue generation.
+    return;
+  else {
     // 32-bit:  FP-relative
     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
 					     PPC::R12),
@@ -1297,7 +1317,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
       // least one CR register, restore all spilled CRs together.
       if ((CR2Spilled || CR3Spilled || CR4Spilled)
 	  && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
-	restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+        bool is31 = needsFP(*MF);
+        restoreCRs(Subtarget.isPPC64(), is31,
+                   CR2Spilled, CR3Spilled, CR4Spilled,
 		   MBB, I, CSI, CSIIndex);
 	CR2Spilled = CR3Spilled = CR4Spilled = false;
       }
@@ -1320,9 +1342,11 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   }
 
   // If we haven't yet spilled the CRs, do so now.
-  if (CR2Spilled || CR3Spilled || CR4Spilled)
-    restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+  if (CR2Spilled || CR3Spilled || CR4Spilled) {
+    bool is31 = needsFP(*MF); 
+    restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
 	       MBB, I, CSI, CSIIndex);
+  }
 
   return true;
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 95efc11..aed0fbb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -457,7 +457,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
       SH &= 31;
       SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
                           getI32Imm(ME) };
-      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
     }
   }
   return 0;
@@ -780,7 +780,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       }
       case ISD::SETGT: {
         SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
-        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
                      0);
         return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
                                     getI32Imm(1));
@@ -873,7 +873,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
 
   // Get the specified bit.
   SDValue Tmp =
-    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
   if (Inv) {
     assert(OtherCondIdx == -1 && "Can't have split plus negation");
     return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
@@ -885,7 +885,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   // Get the other bit of the comparison.
   Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
   SDValue OtherCond =
-    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
 
   return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
 }
@@ -1079,7 +1079,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { Offset, Base, Chain };
       return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
                                     PPCLowering.getPointerTy(),
-                                    MVT::Other, Ops, 3);
+                                    MVT::Other, Ops);
     } else {
       unsigned Opcode;
       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -1114,7 +1114,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { Base, Offset, Chain };
       return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
                                     PPCLowering.getPointerTy(),
-                                    MVT::Other, Ops, 3);
+                                    MVT::Other, Ops);
     }
   }
 
@@ -1163,7 +1163,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
                             N->getOperand(0).getOperand(1),
                             getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
-        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
       }
     }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 16fc8a0..3fcafdc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -71,6 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
   const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
   PPCRegInfo = TM.getRegisterInfo();
+  PPCII = TM.getInstrInfo();
 
   setPow2DivIsCheap();
 
@@ -513,7 +514,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 
   setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+  // Altivec instructions set fields to all zeros or all ones.
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   if (isPPC64) {
     setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -4672,10 +4674,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
       !Op.getOperand(2).getValueType().isFloatingPoint())
     return Op;
 
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  // We might be able to do better than this under some circumstances, but in
+  // general, fsel-based lowering of select is a finite-math-only optimization.
+  // For more information, see section F.3 of the 2.06 ISA specification.
+  if (!DAG.getTarget().Options.NoInfsFPMath ||
+      !DAG.getTarget().Options.NoNaNsFPMath)
+    return Op;
 
-  // Cannot handle SETEQ/SETNE.
-  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   EVT ResVT = Op.getValueType();
   EVT CmpVT = Op.getOperand(0).getValueType();
@@ -4685,9 +4691,20 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
+  SDValue Sel1;
   if (isFloatingPointZero(RHS))
     switch (CC) {
     default: break;       // SETUO etc aren't handled by fsel.
+    case ISD::SETNE:
+      std::swap(TV, FV);
+    case ISD::SETEQ:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
     case ISD::SETULT:
     case ISD::SETLT:
       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
@@ -4710,30 +4727,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cmp;
   switch (CC) {
   default: break;       // SETUO etc aren't handled by fsel.
+  case ISD::SETNE:
+    std::swap(TV, FV);
+  case ISD::SETEQ:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
   case ISD::SETULT:
   case ISD::SETLT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOGE:
   case ISD::SETGE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   case ISD::SETUGT:
   case ISD::SETGT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOLE:
   case ISD::SETLE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   }
   return Op;
 }
@@ -6239,29 +6267,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
   if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
                                  MI->getOpcode() == PPC::SELECT_CC_I8)) {
-    unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
-                                         PPC::ISEL8 : PPC::ISEL;
-    unsigned SelectPred = MI->getOperand(4).getImm();
-    DebugLoc dl = MI->getDebugLoc();
+    SmallVector<MachineOperand, 2> Cond;
+    Cond.push_back(MI->getOperand(4));
+    Cond.push_back(MI->getOperand(1));
 
-    unsigned SubIdx;
-    bool SwapOps;
-    switch (SelectPred) {
-    default: llvm_unreachable("invalid predicate for isel");
-    case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
-    case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
-    case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
-    case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
-    case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
-    case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
-    case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
-    case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
-    }
-
-    BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
-      .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
-      .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
+    DebugLoc dl = MI->getDebugLoc();
+    PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
+                        MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
              MI->getOpcode() == PPC::SELECT_CC_I8 ||
              MI->getOpcode() == PPC::SELECT_CC_F4 ||
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7157b70..423e983 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,6 +16,7 @@
 #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 
 #include "PPC.h"
+#include "PPCInstrInfo.h"
 #include "PPCRegisterInfo.h"
 #include "PPCSubtarget.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -327,6 +328,7 @@ namespace llvm {
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &PPCSubTarget;
     const PPCRegisterInfo *PPCRegInfo;
+    const PPCInstrInfo *PPCII;
 
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index fa5b65f..bff4c23 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -17,17 +17,21 @@
 //
 def s16imm64 : Operand<i64> {
   let PrintMethod = "printS16ImmOperand";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def u16imm64 : Operand<i64> {
   let PrintMethod = "printU16ImmOperand";
+  let ParserMatchClass = PPCU16ImmAsmOperand;
 }
 def symbolHi64 : Operand<i64> {
   let PrintMethod = "printSymbolHi";
   let EncoderMethod = "getHA16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def symbolLo64 : Operand<i64> {
   let PrintMethod = "printSymbolLo";
   let EncoderMethod = "getLO16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def tocentry : Operand<iPTR> {
   let MIOperandInfo = (ops i64imm:$imm);
@@ -66,10 +70,17 @@ def HI48_64 : SDNodeXForm<imm, [{
 // Calls.
 //
 
+let Interpretation64Bit = 1 in {
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
-  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
     def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
         Requires<[In64BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+                             "b${cond:cc}ctr ${cond:reg}", BrB, []>,
+        Requires<[In64BitMode]>;
+  }
 }
 
 let Defs = [LR8] in
@@ -83,8 +94,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
     def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
                         "bdnz $dst">;
   }
+
+  let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
+    def BDZLR8  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+                              "bdzlr", BrB, []>;
+    def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+                              "bdnzlr", BrB, []>;
+  }
 }
 
+
+
 let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
@@ -116,9 +136,14 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
     def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
                               "bctrl", BrB, [(PPCbctrl)]>,
                  Requires<[In64BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+                              "b${cond:cc}ctrl ${cond:reg}", BrB, []>,
+        Requires<[In64BitMode]>;
   }
 }
-
+} // Interpretation64Bit
 
 // Calls
 def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
@@ -135,45 +160,46 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
       [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_SUB_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
       [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_OR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
       [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_XOR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
       [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_AND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
       [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_NAND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
       [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
       [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
 
     def ATOMIC_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
       [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
-def LDARX : XForm_1<31,  84, (outs G8RC:$rD), (ins memrr:$ptr),
+def LDARX : XForm_1<31,  84, (outs g8rc:$rD), (ins memrr:$ptr),
                    "ldarx $rD, $ptr", LdStLDARX,
                    [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
 
 let Defs = [CR0] in
-def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stdcx. $rS, $dst", LdStSTDCX,
                    [(PPCstcx i64:$rS, xoaddr:$dst)]>,
                    isDOT;
 
+let Interpretation64Bit = 1 in {
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
 def TCRETURNdi8 :Pseudo< (outs),
                         (ins calltarget:$dst, i32imm:$offset),
@@ -212,6 +238,7 @@ def TAILBA8   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   []>;
 
 }
+} // Interpretation64Bit
 
 def : Pat<(PPCtc_return (i64 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
@@ -224,21 +251,25 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
 
 
 // 64-bit CR instructions
-def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins g8rc:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
 let isCodeGenOnly = 1 in
-def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+def MFCR8pseud: XFXForm_3<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
                        "#MFCR8pseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
-            
-def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
+def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
                      "mfcr $rT", SprMFCR>,
                      PPC970_MicroCode, PPC970_Unit_CRU;
 
 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
-  def EH_SjLj_SetJmp64  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+  def EH_SjLj_SetJmp64  : Pseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP64",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[In64BitMode]>;
@@ -253,18 +284,18 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
 // 64-bit SPR manipulation instrs.
 
 let Uses = [CTR8] in {
-def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
                            "mfctr $rT", SprMFSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
-def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
                            "mtctr $rS", SprMTSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
 let Pattern = [(set i64:$rT, readcyclecounter)] in
-def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
                           "mfspr $rT, 268", SprMFTB>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 // Note that encoding mftb using mfspr is now the preferred form,
@@ -273,252 +304,265 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
 // the POWER3.
 
 let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
+def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
                        [(set i64:$result,
                              (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
 
 let Defs = [LR8] in {
-def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
                            "mtlr $rS", SprMTSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Uses = [LR8] in {
-def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
                            "mflr $rT", SprMFSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
+} // Interpretation64Bit
 
 //===----------------------------------------------------------------------===//
 // Fixed point instructions.
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-def LI8  : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+def LI8  : DForm_2_r0<14, (outs g8rc:$rD), (ins symbolLo64:$imm),
                       "li $rD, $imm", IntSimple,
                       [(set i64:$rD, immSExt16:$imm)]>;
-def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins symbolHi64:$imm),
                       "lis $rD, $imm", IntSimple,
                       [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
 }
 
 // Logical ops.
-def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "nand $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
-def AND8 : XForm_6<31,  28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "and $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
-def ANDC8: XForm_6<31,  60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "andc $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
-def OR8  : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "or $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
-def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "nor $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
-def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "orc $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
-def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "eqv $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
-def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "xor $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "nand", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
+defm AND8 : XForm_6r<31,  28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "and", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+defm ANDC8: XForm_6r<31,  60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "andc", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+defm OR8  : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "or", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
+defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "nor", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "orc", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "eqv", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
+defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "xor", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
 
 // Logical ops with immediate.
-def ANDIo8  : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+let Defs = [CR0] in {
+def ANDIo8  : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "andi. $dst, $src1, $src2", IntGeneral,
                       [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
                       isDOT;
-def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                      "andis. $dst, $src1, $src2", IntGeneral,
                     [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
                      isDOT;
-def ORI8    : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+}
+def ORI8    : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "ori $dst, $src1, $src2", IntSimple,
                       [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
-def ORIS8   : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def ORIS8   : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "oris $dst, $src1, $src2", IntSimple,
                     [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI8   : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def XORI8   : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "xori $dst, $src1, $src2", IntSimple,
                       [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
-def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def XORIS8  : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "xoris $dst, $src1, $src2", IntSimple,
                    [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
 
-def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "add $rT, $rA, $rB", IntSimple,
-                     [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+defm ADD8  : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "add", "$rT, $rA, $rB", IntSimple,
+                       [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
 // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
 // initial-exec thread-local storage model.
 let isCodeGenOnly = 1 in
-def ADD8TLS  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+def ADD8TLS  : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
                         "add $rT, $rA, $rB@tls", IntSimple,
                         [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
                      
-let Defs = [CARRY] in {
-def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "addc $rT, $rA, $rB", IntGeneral,
-                     [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
-                     PPC970_DGroup_Cracked;
-def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "addc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+                        PPC970_DGroup_Cracked;
+let Defs = [CARRY] in
+def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
                      [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
-}
-def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
+def ADDI8  : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolLo64:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
                      [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
+def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolHi64:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
                      [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
 
 let Defs = [CARRY] in {
-def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
                      [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
-def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                      "subfc $rT, $rA, $rB", IntGeneral,
-                      [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
-                      PPC970_DGroup_Cracked;
-}
-def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "subf $rT, $rA, $rB", IntGeneral,
-                     [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
-def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "neg $rT, $rA", IntSimple,
-                       [(set i64:$rT, (ineg i64:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                       "adde $rT, $rA, $rB", IntGeneral,
-                       [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
-def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "addme $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (adde i64:$rA, -1))]>;
-def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "addze $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (adde i64:$rA, 0))]>;
-def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                       "subfe $rT, $rA, $rB", IntGeneral,
-                       [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
-def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "subfme $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (sube -1, i64:$rA))]>;
-def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "subfze $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (sube 0, i64:$rA))]>;
-}
-
-
-def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulhd $rT, $rA, $rB", IntMulHW,
-                     [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
-def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulhdu $rT, $rA, $rB", IntMulHWU,
-                     [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
-
-def CMPD   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
-                          "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPLD  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
-                          "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPDI  : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
-                         "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
-def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
-                         "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
-
-def SLD  : XForm_6<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "sld $rA, $rS, $rB", IntRotateD,
-                   [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
-def SRD  : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "srd $rA, $rS, $rB", IntRotateD,
-                   [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
-let Defs = [CARRY] in {
-def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "srad $rA, $rS, $rB", IntRotateD,
-                   [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "subfc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+                        PPC970_DGroup_Cracked;
+}
+defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "subf", "$rT, $rA, $rB", IntGeneral,
+                       [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm NEG8    : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                        "neg", "$rT, $rA", IntSimple,
+                        [(set i64:$rT, (ineg i64:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE8   : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "adde", "$rT, $rA, $rB", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
+defm ADDME8  : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "addme", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, -1))]>;
+defm ADDZE8  : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "addze", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, 0))]>;
+defm SUBFE8  : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "subfe", "$rT, $rA, $rB", IntGeneral,
+                          [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
+defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "subfme", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (sube -1, i64:$rA))]>;
+defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "subfze", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (sube 0, i64:$rA))]>;
 }
-                   
-def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsb $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
-def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsh $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
-
-def EXTSW  : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
-def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
 
-let Defs = [CARRY] in {
-def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
-                      "sradi $rA, $rS, $SH", IntRotateDI,
-                      [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+
+defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "mulhd", "$rT, $rA, $rB", IntMulHW,
+                       [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
+defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "mulhdu", "$rT, $rA, $rB", IntMulHWU,
+                       [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+}
+} // Interpretation64Bit
+
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPD   : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+                            "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+  def CMPLD  : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+                            "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+  def CMPDI  : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm),
+                           "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+  def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2),
+                           "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
 }
-def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "cntlzd $rA, $rS", IntGeneral,
-                      [(set i64:$rA, (ctlz i64:$rS))]>;
-def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "popcntd $rA, $rS", IntGeneral,
-                      [(set i64:$rA, (ctpop i64:$rS))]>;
+
+let neverHasSideEffects = 1 in {
+defm SLD  : XForm_6r<31,  27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+                     "sld", "$rA, $rS, $rB", IntRotateD,
+                     [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRD  : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+                     "srd", "$rA, $rS, $rB", IntRotateD,
+                     [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+                      "srad", "$rA, $rS, $rB", IntRotateD,
+                      [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+
+let Interpretation64Bit = 1 in { 
+defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "extsb", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
+defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "extsh", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+} // Interpretation64Bit
+
+defm EXTSW  : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "extsw", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
+let Interpretation64Bit = 1 in
+defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
+                             "extsw", "$rA, $rS", IntSimple,
+                             [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+
+defm SRADI  : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+                         "sradi", "$rA, $rS, $SH", IntRotateDI,
+                         [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "cntlzd", "$rA, $rS", IntGeneral,
+                        [(set i64:$rA, (ctlz i64:$rS))]>;
+defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+                         "popcntd", "$rA, $rS", IntGeneral,
+                         [(set i64:$rA, (ctpop i64:$rS))]>;
 
 // popcntw also does a population count on the high 32 bits (storing the
 // results in the high 32-bits of the output). We'll ignore that here (which is
 // safe because we never separately use the high part of the 64-bit registers).
-def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "popcntw $rA, $rS", IntGeneral,
-                      [(set i32:$rA, (ctpop i32:$rS))]>;
-
-def DIVD  : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "divd $rT, $rA, $rB", IntDivD,
-                     [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "divdu $rT, $rA, $rB", IntDivD,
-                     [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulld $rT, $rA, $rB", IntMulHD,
-                     [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
-
+defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+                         "popcntw", "$rA, $rS", IntGeneral,
+                         [(set i32:$rA, (ctpop i32:$rS))]>;
+
+defm DIVD  : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "divd", "$rT, $rA, $rB", IntDivD,
+                       [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "divdu", "$rT, $rA, $rB", IntDivD,
+                       [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "mulld", "$rT, $rA, $rB", IntMulHD,
+                       [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+}
 
+let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
-def RLDIMI : MDForm_1<30, 3,
-                      (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
-                      "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
-                      []>, isPPC64, RegConstraint<"$rSi = $rA">,
-                      NoEncode<"$rSi">;
+defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
+                        (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64, RegConstraint<"$rSi = $rA">,
+                        NoEncode<"$rSi">;
 }
 
 // Rotate instructions.
-def RLDCL  : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
-                      "rldcl $rA, $rS, $rB, $MBE", IntRotateD,
-                      []>, isPPC64;
-def RLDICL : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
-                      "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
-                      []>, isPPC64;
-def RLDICR : MDForm_1<30, 1,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
-                      "rldicr $rA, $rS, $SH, $MBE", IntRotateDI,
-                      []>, isPPC64;
-
-def RLWINM8 : MForm_2<21,
-                     (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
-                     "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>;
-
+defm RLDCL  : MDSForm_1r<30, 8,
+                        (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+                        "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD,
+                        []>, isPPC64;
+defm RLDICL : MDForm_1r<30, 0,
+                        (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64;
+defm RLDICR : MDForm_1r<30, 1,
+                        (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64;
+
+let Interpretation64Bit = 1 in {
+defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
+                        (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                        "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral,
+                        []>;
+
+let isSelect = 1 in
 def ISEL8   : AForm_4<31, 15,
-                     (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
+                     (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
+}  // Interpretation64Bit
+}  // neverHasSideEffects = 1
 }  // End FXU Operations.
 
 
@@ -529,39 +573,43 @@ def ISEL8   : AForm_4<31, 15,
 
 // Sign extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+let Interpretation64Bit = 1 in
+def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
                   [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
-def LWA  : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+def LWA  : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
                     "lwa $rD, $src", LdStLWA,
                     [(set i64:$rD,
                           (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
                     PPC970_DGroup_Cracked;
-def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+let Interpretation64Bit = 1 in
+def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
                    [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
-def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
                    "lwax $rD, $src", LdStLHA,
                    [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
                    PPC970_DGroup_Cracked;
 
 // Update forms.
-let mayLoad = 1 in {
-def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Interpretation64Bit = 1 in
+def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                     (ins memri:$addr),
                     "lhau $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 // NO LWAU!
 
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+let Interpretation64Bit = 1 in
+def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lhaux $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lwaux $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.ptrreg = $ea_result">,
@@ -569,87 +617,89 @@ def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
 }
 }
 
+let Interpretation64Bit = 1 in {
 // Zero extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
                   [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
-def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
                   [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
-def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
                   [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
 
-def LBZX8 : XForm_1<31,  87, (outs G8RC:$rD), (ins memrr:$src),
+def LBZX8 : XForm_1<31,  87, (outs g8rc:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
                    [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
-def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
                    [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
-def LWZX8 : XForm_1<31,  23, (outs G8RC:$rD), (ins memrr:$src),
+def LWZX8 : XForm_1<31,  23, (outs g8rc:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
                    [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
                    
                    
 // Update forms.
-let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lbzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lhzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lwzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 }
 }
+} // Interpretation64Bit
 
 
 // Full 8-byte loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
                     [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
 // The following three definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
-def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtoc",
                   [(set i64:$rD,
                      (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocJTI",
                   [(set i64:$rD,
                      (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
-def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocCPT",
                   [(set i64:$rD,
                      (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
 
 let hasSideEffects = 1, isCodeGenOnly = 1 in {
 let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
+def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
                     "ld 2, 8($reg)", LdStLD,
                     [(PPCload_toc i64:$reg)]>, isPPC64;
                     
@@ -658,25 +708,26 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
                     "ld 2, 40(1)", LdStLD,
                     [(PPCtoc_restore)]>, isPPC64;
 }
-def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
+def LDX  : XForm_1<31,  21, (outs g8rc:$rD), (ins memrr:$src),
                    "ldx $rD, $src", LdStLD,
                    [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
-def LDBRX : XForm_1<31,  532, (outs G8RC:$rD), (ins memrr:$src),
+def LDBRX : XForm_1<31,  532, (outs g8rc:$rD), (ins memrr:$src),
                    "ldbrx $rD, $src", LdStLoad,
                    [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
 
-let mayLoad = 1 in
-def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDU  : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
                     "ldu $rD, $addr", LdStLDU,
                     []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
                     NoEncode<"$ea_result">;
 
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "ldux $rD, $addr", LdStLDU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">, isPPC64;
 }
+}
 
 def : Pat<(PPCload ixaddr:$src),
           (LD ixaddr:$src)>;
@@ -684,108 +735,111 @@ def : Pat<(PPCload xaddr:$src),
           (LDX xaddr:$src)>;
 
 // Support for medium and large code model.
-def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                        "#ADDIStocHA",
                        [(set i64:$rD,
                          (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
                        isPPC64;
-def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
+def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
                    "#LDtocL",
                    [(set i64:$rD,
                      (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                      "#ADDItocL",
                      [(set i64:$rD,
                        (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
 
 // Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                          "#ADDISgotTprelHA",
                          [(set i64:$rD,
                            (PPCaddisGotTprelHA i64:$reg,
                                                tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
+def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins symbolLo64:$disp, g8rc_nox0:$reg),
                         "#LDgotTprelL",
                         [(set i64:$rD,
                           (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
                  isPPC64;
 def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
           (ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                          "#ADDIStlsgdHA",
                          [(set i64:$rD,
                            (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
                        "#ADDItlsgdL",
                        [(set i64:$rD,
                          (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
-def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                         "#GETtlsADDR",
                         [(set i64:$rD,
                           (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                  isPPC64;
-def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                          "#ADDIStlsldHA",
                          [(set i64:$rD,
                            (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
                        "#ADDItlsldL",
                        [(set i64:$rD,
                          (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
-def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                           "#GETtlsldADDR",
                           [(set i64:$rD,
                             (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                    isPPC64;
-def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                           "#ADDISdtprelHA",
                           [(set i64:$rD,
                             (PPCaddisDtprelHA i64:$reg,
                                               tglobaltlsaddr:$disp))]>,
                    isPPC64;
-def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
                          "#ADDIdtprelL",
                          [(set i64:$rD,
                            (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
 
 let PPC970_Unit = 2 in {
+let Interpretation64Bit = 1 in {
 // Truncating stores.                       
-def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
                    [(truncstorei8 i64:$rS, iaddr:$src)]>;
-def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
                    [(truncstorei16 i64:$rS, iaddr:$src)]>;
-def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
                    [(truncstorei32 i64:$rS, iaddr:$src)]>;
-def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
                    [(truncstorei8 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
                    [(truncstorei16 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
                    [(truncstorei32 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
 // Normal 8-byte stores.
-def STD  : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+def STD  : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
                     "std $rS, $dst", LdStSTD,
                     [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
-def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDX  : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stdx $rS, $dst", LdStSTD,
                    [(store i64:$rS, xaddr:$dst)]>, isPPC64,
                    PPC970_DGroup_Cracked;
-def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stdbrx $rS, $dst", LdStStore,
                    [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
                    PPC970_DGroup_Cracked;
@@ -793,33 +847,36 @@ def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
 
 // Stores with Update (pre-inc).
 let PPC970_Unit = 2, mayStore = 1 in {
-def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+let Interpretation64Bit = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
                    "stbu $rS, $dst", LdStStoreUpd, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
                    "sthu $rS, $dst", LdStStoreUpd, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
                    "stwu $rS, $dst", LdStStoreUpd, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
                    "stdu $rS, $dst", LdStSTDU, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
                    isPPC64;
 
-def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "stbux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "sthux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "stwux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+} // Interpretation64Bit
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "stdux $rS, $dst", LdStSTDU, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked, isPPC64;
@@ -852,29 +909,30 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
 //
 
 
-let PPC970_Unit = 3, Uses = [RM] in {  // FPU Operations.
-def FCFID  : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fcfid $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
-def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fctidz $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
-
-def FCFIDU  : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fcfidu $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
-def FCFIDS  : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
-                      "fcfids $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
-def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
-                      "fcfidus $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
-def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fctiduz $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
-def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fctiwuz $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+let PPC970_Unit = 3, neverHasSideEffects = 1,
+    Uses = [RM] in {  // FPU Operations.
+defm FCFID  : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fcfid", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fctidz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+defm FCFIDU  : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fcfidu", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+defm FCFIDS  : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
+                        "fcfids", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
+                        "fcfidus", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fctiduz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fctiwuz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
 }
 
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index a5ba4c8..cc9cf0a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -163,7 +163,7 @@ def vecspltisw : PatLeaf<(build_vector), [{
 
 // VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
 class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
-  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+  : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
                        [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
 
@@ -171,7 +171,7 @@ class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
 // inputs doesn't match the type of the output.
 class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
                    ValueType InTy>
-  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+  : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
                        [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
 
@@ -179,14 +179,14 @@ class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
 // input types and an output type.
 class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
                    ValueType In1Ty, ValueType In2Ty>
-  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+  : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
                        [(set OutTy:$vD,
                          (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
 
 // VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
 class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
-  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
              [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
 
@@ -194,7 +194,7 @@ class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
 // inputs doesn't match the type of the output.
 class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
                   ValueType InTy>
-  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
              [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
 
@@ -202,13 +202,13 @@ class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
 // input types and an output type.
 class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
                   ValueType In1Ty, ValueType In2Ty>
-  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
              [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
 
 // VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
 class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
-  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+  : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
              !strconcat(opc, " $vD, $vB"), VecFP,
              [(set v4f32:$vD, (IntID v4f32:$vB))]>;
 
@@ -216,7 +216,7 @@ class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
 // inputs doesn't match the type of the output.
 class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
                   ValueType InTy>
-  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+  : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
              !strconcat(opc, " $vD, $vB"), VecFP,
              [(set OutTy:$vD, (IntID InTy:$vB))]>;
 
@@ -234,93 +234,93 @@ def DSSALL   : DSS_Form<822, (outs),
                         (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
                         "dssall", LdStLoad /*FIXME*/, []>;
 def DST      : DSS_Form<342, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTT     : DSS_Form<342, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTST    : DSS_Form<374, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTSTT   : DSS_Form<374, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 
 def DST64    : DSS_Form<342, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTT64   : DSS_Form<342, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTST64  : DSS_Form<374, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTSTT64 : DSS_Form<374, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 }
 
-def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
                       "mfvscr $vD", LdStStore,
                       [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; 
-def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
                       "mtvscr $vB", LdStLoad,
                       [(int_ppc_altivec_mtvscr v4i32:$vB)]>; 
 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
-def LVEBX: XForm_1<31,   7, (outs VRRC:$vD), (ins memrr:$src),
+def LVEBX: XForm_1<31,   7, (outs vrrc:$vD), (ins memrr:$src),
                    "lvebx $vD, $src", LdStLoad,
                    [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
-def LVEHX: XForm_1<31,  39, (outs VRRC:$vD), (ins memrr:$src),
+def LVEHX: XForm_1<31,  39, (outs vrrc:$vD), (ins memrr:$src),
                    "lvehx $vD, $src", LdStLoad,
                    [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
-def LVEWX: XForm_1<31,  71, (outs VRRC:$vD), (ins memrr:$src),
+def LVEWX: XForm_1<31,  71, (outs vrrc:$vD), (ins memrr:$src),
                    "lvewx $vD, $src", LdStLoad,
                    [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
-def LVX  : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+def LVX  : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src),
                    "lvx $vD, $src", LdStLoad,
                    [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
-def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src),
                    "lvxl $vD, $src", LdStLoad,
                    [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
 }
 
-def LVSL : XForm_1<31,   6, (outs VRRC:$vD), (ins memrr:$src),
+def LVSL : XForm_1<31,   6, (outs vrrc:$vD), (ins memrr:$src),
                    "lvsl $vD, $src", LdStLoad,
                    [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
                    PPC970_Unit_LSU;
-def LVSR : XForm_1<31,  38, (outs VRRC:$vD), (ins memrr:$src),
+def LVSR : XForm_1<31,  38, (outs vrrc:$vD), (ins memrr:$src),
                    "lvsr $vD, $src", LdStLoad,
                    [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 
 let PPC970_Unit = 2 in {   // Stores.
-def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvebx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
-def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvehx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
-def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvewx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
-def STVX  : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVX  : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
-def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvxl $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
 }
 
 let PPC970_Unit = 5 in {  // VALU Operations.
 // VA-Form instructions.  3-input AltiVec ops.
-def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
                        "vmaddfp $vD, $vA, $vC, $vB", VecFP,
                        [(set v4f32:$vD,
                         (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
 
 // FIXME: The fma+fneg pattern won't match because fneg is not legal.
-def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
                        "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
                        [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
                                                   (fneg v4f32:$vB))))]>; 
@@ -335,23 +335,23 @@ def VPERM      : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
 def VSEL       : VA1a_Int_Ty<42, "vsel",  int_ppc_altivec_vsel, v4i32>;
 
 // Shuffles.
-def VSLDOI  : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+def VSLDOI  : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
                        "vsldoi $vD, $vA, $vB, $SH", VecFP,
                        [(set v16i8:$vD, 
                          (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
 
 // VX-Form instructions.  AltiVec arithmetic ops.
-def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vaddfp $vD, $vA, $vB", VecFP,
                       [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
                       
-def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vaddubm $vD, $vA, $vB", VecGeneral,
                       [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
-def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vadduhm $vD, $vA, $vB", VecGeneral,
                       [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
-def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vadduwm $vD, $vA, $vB", VecGeneral,
                       [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
                       
@@ -364,27 +364,27 @@ def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
 def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
                              
                              
-def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                     "vand $vD, $vA, $vB", VecFP,
                     [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
-def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                      "vandc $vD, $vA, $vB", VecFP,
                      [(set v4i32:$vD, (and v4i32:$vA,
                                            (vnot_ppc v4i32:$vB)))]>;
 
-def VCFSX  : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCFSX  : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vcfsx $vD, $vB, $UIMM", VecFP,
                       [(set v4f32:$vD,
                              (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
-def VCFUX  : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCFUX  : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vcfux $vD, $vB, $UIMM", VecFP,
                       [(set v4f32:$vD,
                              (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
-def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vctsxs $vD, $vB, $UIMM", VecFP,
                       [(set v4i32:$vD,
                              (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
-def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vctuxs $vD, $vB, $UIMM", VecFP,
                       [(set v4i32:$vD,
                              (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
@@ -393,19 +393,19 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
 // to integer (fp_to_sint/fp_to_uint) conversions and integer
 // to floating-point (sint_to_fp/uint_to_fp) conversions.
 let VA = 0 in {
-def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
                        "vcfsx $vD, $vB, 0", VecFP,
                        [(set v4f32:$vD,
                              (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
-def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
                         "vctuxs $vD, $vB, 0", VecFP,
                         [(set v4i32:$vD,
                                (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
-def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
                        "vcfux $vD, $vB, 0", VecFP,
                        [(set v4f32:$vD,
                                (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
-def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
                       "vctsxs $vD, $vB, 0", VecFP,
                       [(set v4i32:$vD,
                              (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
@@ -435,22 +435,22 @@ def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
 def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
 def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
 
-def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrghb $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrghh $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrghw $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrglb $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrglh $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrglw $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
 
@@ -491,18 +491,18 @@ def VRFIP     : VX2_Int_SP<650, "vrfip",     int_ppc_altivec_vrfip>;
 def VRFIZ     : VX2_Int_SP<586, "vrfiz",     int_ppc_altivec_vrfiz>;
 def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
 
-def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
+def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
 
-def VSUBFP  : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBFP  : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsubfp $vD, $vA, $vB", VecGeneral,
                       [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
-def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsububm $vD, $vA, $vB", VecGeneral,
                       [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
-def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsubuhm $vD, $vA, $vB", VecGeneral,
                       [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
-def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsubuwm $vD, $vA, $vB", VecGeneral,
                       [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
                       
@@ -516,21 +516,21 @@ def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
 def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
 def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
 
-def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
                           v4i32, v16i8, v4i32>;
 def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
                           v4i32, v8i16, v4i32>;
 def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
                           v4i32, v16i8, v4i32>;
 
-def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                     "vnor $vD, $vA, $vB", VecFP,
                     [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
                                                    v4i32:$vB)))]>;
-def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vor $vD, $vA, $vB", VecFP,
                       [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
-def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vxor $vD, $vA, $vB", VecFP,
                       [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
 
@@ -545,15 +545,15 @@ def VSLB   : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
 def VSLH   : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
 def VSLW   : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
 
-def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vspltb $vD, $vB, $UIMM", VecPerm,
                       [(set v16i8:$vD,
                         (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vsplth $vD, $vB, $UIMM", VecPerm,
                       [(set v16i8:$vD,
                         (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vspltw $vD, $vB, $UIMM", VecPerm,
                       [(set v16i8:$vD, 
                         (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
@@ -569,13 +569,13 @@ def VSRH   : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
 def VSRW   : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
 
 
-def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
                        "vspltisb $vD, $SIMM", VecPerm,
                        [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
-def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
                        "vspltish $vD, $SIMM", VecPerm,
                        [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
-def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
                        "vspltisw $vD, $SIMM", VecPerm,
                        [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
 
@@ -590,13 +590,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
                           v16i8, v4i32>;
 def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
                           v8i16, v4i32>;
-def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                        "vpkuhum $vD, $vA, $vB", VecFP,
                        [(set v16i8:$vD,
                          (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
                           v16i8, v8i16>;
-def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                        "vpkuwum $vD, $vA, $vB", VecFP,
                        [(set v16i8:$vD,
                          (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
@@ -621,10 +621,10 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
 // Altivec Comparisons.
 
 class VCMP<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
               [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
 class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
               [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
   let Defs = [CR6];
   let RC = 1;
@@ -665,11 +665,11 @@ def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
                       
 let isCodeGenOnly = 1 in
-def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
                       "vxor $vD, $vD, $vD", VecFP,
                       [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
 let IMM=-1 in {
-def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
                       "vspltisw $vD, -1", VecFP,
                       [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 400b7e3..b6f4e85 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -35,6 +35,15 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
   let TSFlags{1}   = PPC970_Single;
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
+
+  // Fields used for relation models.
+  string BaseName = "";
+
+  // For cases where multiple instruction definitions really represent the
+  // same underlying instruction but with one definition for 64-bit arguments
+  // and one for 32-bit arguments, this bit breaks the degeneracy between
+  // the two forms and allows TableGen to generate mapping tables.
+  bit Interpretation64Bit = 0;
 }
 
 class PPC970_DGroup_First   { bits<1> PPC970_First = 1;  }
@@ -80,6 +89,10 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
   let TSFlags{1}   = PPC970_Single;
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
+
+  // Fields used for relation models.
+  string BaseName = "";
+  bit Interpretation64Bit = 0;
 }
 
 // 1.7.1 I-Form
@@ -177,7 +190,12 @@ class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
 
 class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
-  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
+
+  // Even though ADDICo does not really have an RC bit, provide
+  // the declaration of one here so that isDOT has something to set.
+  bit RC = 0;
+}
 
 class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                  InstrItinClass itin, list<dag> pattern>
@@ -347,6 +365,12 @@ class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern> 
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
 
+class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let RST = 0;
+}
+
 class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern> 
   : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -565,9 +589,9 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
   bits<7> BIBO;  // 2 bits of BI and 5 bits of BO.
   bits<3>  CR;
   
-  let BO = BIBO{2-6};
-  let BI{0-1} = BIBO{0-1};
-  let BI{2-4} = CR;
+  let BO = BIBO{4-0};
+  let BI{0-1} = BIBO{5-6};
+  let BI{2-4} = CR{0-2};
   let BH = 0;
 }
 
@@ -837,6 +861,25 @@ class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = RC;
 }
 
+class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RA;
+  bits<5> RS;
+  bits<5> RB;
+  bits<6> MBE;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-26} = MBE{4,3,2,1,0,5};
+  let Inst{27-30} = xo;
+  let Inst{31}    = RC;
+}
 
 
 // E-1 VA-Form
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 69c54ed..1fb17eb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,8 +18,10 @@
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,6 +32,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_INSTRMAP_INFO
 #define GET_INSTRINFO_CTOR
 #include "PPCGenInstrInfo.inc"
 
@@ -39,6 +42,9 @@ static cl::
 opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
             cl::desc("Disable analysis for CTR loops"));
 
+static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
+cl::desc("Disable compare instruction optimization"), cl::Hidden);
+
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
     TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
@@ -147,7 +153,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   MachineFunction &MF = *MI->getParent()->getParent();
 
   // Normal instructions can be commuted the obvious way.
-  if (MI->getOpcode() != PPC::RLWIMI)
+  if (MI->getOpcode() != PPC::RLWIMI &&
+      MI->getOpcode() != PPC::RLWIMIo)
     return TargetInstrInfo::commuteInstruction(MI, NewMI);
 
   // Cannot commute if it has a non-zero rotate count.
@@ -417,6 +424,105 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return 2;
 }
 
+// Select analysis.
+bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+                const SmallVectorImpl<MachineOperand> &Cond,
+                unsigned TrueReg, unsigned FalseReg,
+                int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+  if (!TM.getSubtargetImpl()->hasISEL())
+    return false;
+
+  if (Cond.size() != 2)
+    return false;
+
+  // If this is really a bdnz-like condition, then it cannot be turned into a
+  // select.
+  if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+    return false;
+
+  // Check register classes.
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  if (!RC)
+    return false;
+
+  // isel is for regular integer GPRs only.
+  if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
+      !PPC::G8RCRegClass.hasSubClassEq(RC))
+    return false;
+
+  // FIXME: These numbers are for the A2, how well they work for other cores is
+  // an open question. On the A2, the isel instruction has a 2-cycle latency
+  // but single-cycle throughput. These numbers are used in combination with
+  // the MispredictPenalty setting from the active SchedMachineModel.
+  CondCycles = 1;
+  TrueCycles = 1;
+  FalseCycles = 1;
+
+  return true;
+}
+
+void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI, DebugLoc dl,
+                                unsigned DestReg,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                unsigned TrueReg, unsigned FalseReg) const {
+  assert(Cond.size() == 2 &&
+         "PPC branch conditions have two components!");
+
+  assert(TM.getSubtargetImpl()->hasISEL() &&
+         "Cannot insert select on target without ISEL support");
+
+  // Get the register classes.
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  assert(RC && "TrueReg and FalseReg must have overlapping register classes");
+  assert((PPC::GPRCRegClass.hasSubClassEq(RC) ||
+          PPC::G8RCRegClass.hasSubClassEq(RC)) &&
+         "isel is for regular integer GPRs only");
+
+  unsigned OpCode =
+    PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8;
+  unsigned SelectPred = Cond[0].getImm();
+
+  unsigned SubIdx;
+  bool SwapOps;
+  switch (SelectPred) {
+  default: llvm_unreachable("invalid predicate for isel");
+  case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+  case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+  case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+  case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+  case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+  case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+  case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+  case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+  }
+
+  unsigned FirstReg =  SwapOps ? FalseReg : TrueReg,
+           SecondReg = SwapOps ? TrueReg  : FalseReg;
+
+  // The first input register of isel cannot be r0. If it is a member
+  // of a register class that can be r0, then copy it first (the
+  // register allocator should eliminate the copy).
+  if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
+      MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
+    const TargetRegisterClass *FirstRC =
+      MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
+        &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
+    unsigned OldFirstReg = FirstReg;
+    FirstReg = MRI.createVirtualRegister(FirstRC);
+    BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
+      .addReg(OldFirstReg);
+  }
+
+  BuildMI(MBB, MI, dl, get(OpCode), DestReg)
+    .addReg(FirstReg).addReg(SecondReg)
+    .addReg(Cond[1].getReg(), 0, SubIdx);
+}
+
 void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I, DebugLoc DL,
                                unsigned DestReg, unsigned SrcReg,
@@ -707,6 +813,555 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
+bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const {
+  // For some instructions, it is legal to fold ZERO into the RA register field.
+  // A zero immediate should always be loaded with a single li.
+  unsigned DefOpc = DefMI->getOpcode();
+  if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
+    return false;
+  if (!DefMI->getOperand(1).isImm())
+    return false;
+  if (DefMI->getOperand(1).getImm() != 0)
+    return false;
+
+  // Note that we cannot here invert the arguments of an isel in order to fold
+  // a ZERO into what is presented as the second argument. All we have here
+  // is the condition bit, and that might come from a CR-logical bit operation.
+
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
+
+  // Only fold into real machine instructions.
+  if (UseMCID.isPseudo())
+    return false;
+
+  unsigned UseIdx;
+  for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx)
+    if (UseMI->getOperand(UseIdx).isReg() &&
+        UseMI->getOperand(UseIdx).getReg() == Reg)
+      break;
+
+  assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI");
+  assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
+
+  const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx];
+
+  // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
+  // register (which might also be specified as a pointer class kind).
+  if (UseInfo->isLookupPtrRegClass()) {
+    if (UseInfo->RegClass /* Kind */ != 1)
+      return false;
+  } else {
+    if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
+        UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
+      return false;
+  }
+
+  // Make sure this is not tied to an output register (or otherwise
+  // constrained). This is true for ST?UX registers, for example, which
+  // are tied to their output registers.
+  if (UseInfo->Constraints != 0)
+    return false;
+
+  unsigned ZeroReg;
+  if (UseInfo->isLookupPtrRegClass()) {
+    bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+    ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
+  } else {
+    ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
+              PPC::ZERO8 : PPC::ZERO;
+  }
+
+  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+  UseMI->getOperand(UseIdx).setReg(ZeroReg);
+
+  if (DeleteDef)
+    DefMI->eraseFromParent();
+
+  return true;
+}
+
+static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+       I != IE; ++I)
+    if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+      return true;
+  return false;
+}
+
+// We should make sure that, if we're going to predicate both sides of a
+// condition (a diamond), that both sides don't define the counter register. We
+// can predicate counter-decrement-based branches, but while that predicates
+// the branching, it does not predicate the counter decrement. If we tried to
+// merge the triangle into one predicated block, we'd decrement the counter
+// twice.
+bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                     unsigned NumT, unsigned ExtraT,
+                     MachineBasicBlock &FMBB,
+                     unsigned NumF, unsigned ExtraF,
+                     const BranchProbability &Probability) const {
+  return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
+}
+
+
+bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
+  // The predicated branches are identified by their type, not really by the
+  // explicit presence of a predicate. Furthermore, some of them can be
+  // predicated more than once. Because if conversion won't try to predicate
+  // any instruction which already claims to be predicated (by returning true
+  // here), always return false. In doing so, we let isPredicable() be the
+  // final word on whether not the instruction can be (further) predicated.
+
+  return false;
+}
+
+bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  if (!MI->isTerminator())
+    return false;
+
+  // Conditional branch is a special case.
+  if (MI->isBranch() && !MI->isBarrier())
+    return true;
+
+  return !isPredicated(MI);
+}
+
+bool PPCInstrInfo::PredicateInstruction(
+                     MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned OpC = MI->getOpcode();
+  if (OpC == PPC::BLR) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+      MI->setDesc(get(Pred[0].getImm() ?
+                      (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
+                      (isPPC64 ? PPC::BDZLR8  : PPC::BDZLR)));
+    } else {
+      MI->setDesc(get(PPC::BCLR));
+      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+        .addImm(Pred[0].getImm())
+        .addReg(Pred[1].getReg());
+    }
+
+    return true;
+  } else if (OpC == PPC::B) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+      MI->setDesc(get(Pred[0].getImm() ?
+                      (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+                      (isPPC64 ? PPC::BDZ8  : PPC::BDZ)));
+    } else {
+      MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+      MI->RemoveOperand(0);
+
+      MI->setDesc(get(PPC::BCC));
+      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+        .addImm(Pred[0].getImm())
+        .addReg(Pred[1].getReg())
+        .addMBB(MBB);
+    }
+
+    return true;
+  } else if (OpC == PPC::BCTR  || OpC == PPC::BCTR8 ||
+             OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
+      llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
+
+    bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+    bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+    MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+                              (setLR ? PPC::BCCTRL  : PPC::BCCTR)));
+    MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+      .addImm(Pred[0].getImm())
+      .addReg(Pred[1].getReg());
+    return true;
+  }
+
+  return false;
+}
+
+bool PPCInstrInfo::SubsumesPredicate(
+                     const SmallVectorImpl<MachineOperand> &Pred1,
+                     const SmallVectorImpl<MachineOperand> &Pred2) const {
+  assert(Pred1.size() == 2 && "Invalid PPC first predicate");
+  assert(Pred2.size() == 2 && "Invalid PPC second predicate");
+
+  if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
+    return false;
+  if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
+    return false;
+
+  PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
+  PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
+
+  if (P1 == P2)
+    return true;
+
+  // Does P1 subsume P2, e.g. GE subsumes GT.
+  if (P1 == PPC::PRED_LE &&
+      (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
+    return true;
+  if (P1 == PPC::PRED_GE &&
+      (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
+    return true;
+
+  return false;
+}
+
+bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  // Note: At the present time, the contents of Pred from this function is
+  // unused by IfConversion. This implementation follows ARM by pushing the
+  // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
+  // predicate, instructions defining CTR or CTR8 are also included as
+  // predicate-defining instructions.
+
+  const TargetRegisterClass *RCs[] =
+    { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
+      &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
+      const TargetRegisterClass *RC = RCs[c];
+      if (MO.isReg()) {
+        if (MO.isDef() && RC->contains(MO.getReg())) {
+          Pred.push_back(MO);
+          Found = true;
+        }
+      } else if (MO.isRegMask()) {
+        for (TargetRegisterClass::iterator I = RC->begin(),
+             IE = RC->end(); I != IE; ++I)
+          if (MO.clobbersPhysReg(*I)) {
+            Pred.push_back(MO);
+            Found = true;
+          }
+      }
+    }
+  }
+
+  return Found;
+}
+
+bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
+  unsigned OpC = MI->getOpcode();
+  switch (OpC) {
+  default:
+    return false;
+  case PPC::B:
+  case PPC::BLR:
+  case PPC::BCTR:
+  case PPC::BCTR8:
+  case PPC::BCTRL:
+  case PPC::BCTRL8:
+    return true;
+  }
+}
+
+bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI,
+                                  unsigned &SrcReg, unsigned &SrcReg2,
+                                  int &Mask, int &Value) const {
+  unsigned Opc = MI->getOpcode();
+
+  switch (Opc) {
+  default: return false;
+  case PPC::CMPWI:
+  case PPC::CMPLWI:
+  case PPC::CMPDI:
+  case PPC::CMPLDI:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = 0;
+    Value = MI->getOperand(2).getImm();
+    Mask = 0xFFFF;
+    return true;
+  case PPC::CMPW:
+  case PPC::CMPLW:
+  case PPC::CMPD:
+  case PPC::CMPLD:
+  case PPC::FCMPUS:
+  case PPC::FCMPUD:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = MI->getOperand(2).getReg();
+    return true;
+  }
+}
+
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
+                                        unsigned SrcReg, unsigned SrcReg2,
+                                        int Mask, int Value,
+                                        const MachineRegisterInfo *MRI) const {
+  if (DisableCmpOpt)
+    return false;
+
+  int OpC = CmpInstr->getOpcode();
+  unsigned CRReg = CmpInstr->getOperand(0).getReg();
+
+  // FP record forms set CR1 based on the execption status bits, not a
+  // comparison with zero.
+  if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
+    return false;
+
+  // The record forms set the condition register based on a signed comparison
+  // with zero (so says the ISA manual). This is not as straightforward as it
+  // seems, however, because this is always a 64-bit comparison on PPC64, even
+  // for instructions that are 32-bit in nature (like slw for example).
+  // So, on PPC32, for unsigned comparisons, we can use the record forms only
+  // for equality checks (as those don't depend on the sign). On PPC64,
+  // we are restricted to equality for unsigned 64-bit comparisons and for
+  // signed 32-bit comparisons the applicability is more restricted.
+  bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+  bool is32BitSignedCompare   = OpC ==  PPC::CMPWI || OpC == PPC::CMPW;
+  bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
+  bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+
+  // Get the unique definition of SrcReg.
+  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+  if (!MI) return false;
+  int MIOpC = MI->getOpcode();
+
+  bool equalityOnly = false;
+  bool noSub = false;
+  if (isPPC64) {
+    if (is32BitSignedCompare) {
+      // We can perform this optimization only if MI is sign-extending.
+      if (MIOpC == PPC::SRAW  || MIOpC == PPC::SRAWo ||
+          MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo ||
+          MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo ||
+          MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo ||
+          MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) {
+        noSub = true;
+      } else
+        return false;
+    } else if (is32BitUnsignedCompare) {
+      // We can perform this optimization, equality only, if MI is
+      // zero-extending.
+      if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
+          MIOpC == PPC::SLW    || MIOpC == PPC::SLWo ||
+          MIOpC == PPC::SRW    || MIOpC == PPC::SRWo) {
+        noSub = true;
+        equalityOnly = true;
+      } else
+        return false;
+    } else
+      equalityOnly = is64BitUnsignedCompare;
+  } else
+    equalityOnly = is32BitUnsignedCompare;
+
+  if (equalityOnly) {
+    // We need to check the uses of the condition register in order to reject
+    // non-equality comparisons.
+    for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+         IE = MRI->use_end(); I != IE; ++I) {
+      MachineInstr *UseMI = &*I;
+      if (UseMI->getOpcode() == PPC::BCC) {
+        unsigned Pred = UseMI->getOperand(0).getImm();
+        if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE)
+          continue;
+
+        return false;
+      } else if (UseMI->getOpcode() == PPC::ISEL ||
+                 UseMI->getOpcode() == PPC::ISEL8) {
+        unsigned SubIdx = UseMI->getOperand(3).getSubReg();
+        if (SubIdx == PPC::sub_eq)
+          continue;
+
+        return false;
+      } else
+        return false;
+    }
+  }
+
+  // Get ready to iterate backward from CmpInstr.
+  MachineBasicBlock::iterator I = CmpInstr, E = MI,
+                              B = CmpInstr->getParent()->begin();
+
+  // Scan forward to find the first use of the compare.
+  for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
+       I != EL; ++I) {
+    bool FoundUse = false;
+    for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg),
+         JE = MRI->use_end(); J != JE; ++J)
+      if (&*J == &*I) {
+        FoundUse = true;
+        break;
+      }
+
+    if (FoundUse)
+      break;
+  }
+
+  // Early exit if we're at the beginning of the BB.
+  if (I == B) return false;
+
+  // There are two possible candidates which can be changed to set CR[01].
+  // One is MI, the other is a SUB instruction.
+  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+  MachineInstr *Sub = NULL;
+  if (SrcReg2 != 0)
+    // MI is not a candidate for CMPrr.
+    MI = NULL;
+  // FIXME: Conservatively refuse to convert an instruction which isn't in the
+  // same BB as the comparison. This is to allow the check below to avoid calls
+  // (and other explicit clobbers); instead we should really check for these
+  // more explicitly (in at least a few predecessors).
+  else if (MI->getParent() != CmpInstr->getParent() || Value != 0) {
+    // PPC does not have a record-form SUBri.
+    return false;
+  }
+
+  // Search for Sub.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  --I;
+  for (; I != E && !noSub; --I) {
+    const MachineInstr &Instr = *I;
+    unsigned IOpC = Instr.getOpcode();
+
+    if (&*I != CmpInstr && (
+        Instr.modifiesRegister(PPC::CR0, TRI) ||
+        Instr.readsRegister(PPC::CR0, TRI)))
+      // This instruction modifies or uses the record condition register after
+      // the one we want to change. While we could do this transformation, it
+      // would likely not be profitable. This transformation removes one
+      // instruction, and so even forcing RA to generate one move probably
+      // makes it unprofitable.
+      return false;
+
+    // Check whether CmpInstr can be made redundant by the current instruction.
+    if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
+         OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
+        (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
+        ((Instr.getOperand(1).getReg() == SrcReg &&
+          Instr.getOperand(2).getReg() == SrcReg2) ||
+        (Instr.getOperand(1).getReg() == SrcReg2 &&
+         Instr.getOperand(2).getReg() == SrcReg))) {
+      Sub = &*I;
+      break;
+    }
+
+    if (I == B)
+      // The 'and' is below the comparison instruction.
+      return false;
+  }
+
+  // Return false if no candidates exist.
+  if (!MI && !Sub)
+    return false;
+
+  // The single candidate is called MI.
+  if (!MI) MI = Sub;
+
+  int NewOpC = -1;
+  MIOpC = MI->getOpcode();
+  if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+    NewOpC = MIOpC;
+  else {
+    NewOpC = PPC::getRecordFormOpcode(MIOpC);
+    if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
+      NewOpC = MIOpC;
+  }
+
+  // FIXME: On the non-embedded POWER architectures, only some of the record
+  // forms are fast, and we should use only the fast ones.
+
+  // The defining instruction has a record form (or is already a record
+  // form). It is possible, however, that we'll need to reverse the condition
+  // code of the users.
+  if (NewOpC == -1)
+    return false;
+
+  SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+  SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
+  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
+  // needs to be updated to be based on SUB.  Push the condition code
+  // operands to OperandsToUpdate.  If it is safe to remove CmpInstr, the
+  // condition code of these operands will be modified.
+  bool ShouldSwap = false;
+  if (Sub) {
+    ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+      Sub->getOperand(2).getReg() == SrcReg;
+
+    // The operands to subf are the opposite of sub, so only in the fixed-point
+    // case, invert the order.
+    ShouldSwap = !ShouldSwap;
+  }
+
+  if (ShouldSwap)
+    for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+         IE = MRI->use_end(); I != IE; ++I) {
+      MachineInstr *UseMI = &*I;
+      if (UseMI->getOpcode() == PPC::BCC) {
+        PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
+        assert((!equalityOnly ||
+                Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
+               "Invalid predicate for equality-only optimization");
+        PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)),
+                                PPC::getSwappedPredicate(Pred)));
+      } else if (UseMI->getOpcode() == PPC::ISEL ||
+                 UseMI->getOpcode() == PPC::ISEL8) {
+        unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
+        assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
+               "Invalid CR bit for equality-only optimization");
+
+        if (NewSubReg == PPC::sub_lt)
+          NewSubReg = PPC::sub_gt;
+        else if (NewSubReg == PPC::sub_gt)
+          NewSubReg = PPC::sub_lt;
+
+        SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)),
+                                                 NewSubReg));
+      } else // We need to abort on a user we don't understand.
+        return false;
+    }
+
+  // Create a new virtual register to hold the value of the CR set by the
+  // record-form instruction. If the instruction was not previously in
+  // record form, then set the kill flag on the CR.
+  CmpInstr->eraseFromParent();
+
+  MachineBasicBlock::iterator MII = MI;
+  BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
+          get(TargetOpcode::COPY), CRReg)
+    .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
+
+  if (MIOpC != NewOpC) {
+    // We need to be careful here: we're replacing one instruction with
+    // another, and we need to make sure that we get all of the right
+    // implicit uses and defs. On the other hand, the caller may be holding
+    // an iterator to this instruction, and so we can't delete it (this is
+    // specifically the case if this is the instruction directly after the
+    // compare).
+
+    const MCInstrDesc &NewDesc = get(NewOpC);
+    MI->setDesc(NewDesc);
+
+    if (NewDesc.ImplicitDefs)
+      for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+           *ImpDefs; ++ImpDefs)
+        if (!MI->definesRegister(*ImpDefs))
+          MI->addOperand(*MI->getParent()->getParent(),
+                         MachineOperand::CreateReg(*ImpDefs, true, true));
+    if (NewDesc.ImplicitUses)
+      for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+           *ImpUses; ++ImpUses)
+        if (!MI->readsRegister(*ImpUses))
+          MI->addOperand(*MI->getParent()->getParent(),
+                         MachineOperand::CreateReg(*ImpUses, false, true));
+  }
+
+  // Modify the condition code of operands in OperandsToUpdate.
+  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+  for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
+    PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
+
+  for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
+    SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
+
+  return true;
+}
+
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
 ///
@@ -729,3 +1384,152 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     return 4; // PowerPC instructions are all 4 bytes
   }
 }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR,  "Number of early returns");
+
+namespace llvm {
+  void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+  // PPCEarlyReturn pass - For simple functions without epilogue code, move
+  // returns up, and create conditional returns, to avoid unnecessary
+  // branch-to-blr sequences.
+  struct PPCEarlyReturn : public MachineFunctionPass {
+    static char ID;
+    PPCEarlyReturn() : MachineFunctionPass(ID) {
+      initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+    }
+
+    const PPCTargetMachine *TM;
+    const PPCInstrInfo *TII;
+
+protected:
+    bool processBlock(MachineBasicBlock &ReturnMBB) {
+      bool Changed = false;
+
+      MachineBasicBlock::iterator I = ReturnMBB.begin();
+      I = ReturnMBB.SkipPHIsAndLabels(I);
+
+      // The block must be essentially empty except for the blr.
+      if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR ||
+          I != ReturnMBB.getLastNonDebugInstr())
+        return Changed;
+
+      SmallVector<MachineBasicBlock*, 8> PredToRemove;
+      for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+           PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+        bool OtherReference = false, BlockChanged = false;
+        for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+          if (J->getOpcode() == PPC::B) {
+            if (J->getOperand(0).getMBB() == &ReturnMBB) {
+              // This is an unconditional branch to the return. Replace the
+	      // branch with a blr.
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBLR;
+              continue;
+            }
+          } else if (J->getOpcode() == PPC::BCC) {
+            if (J->getOperand(2).getMBB() == &ReturnMBB) {
+              // This is a conditional branch to the return. Replace the branch
+              // with a bclr.
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
+                .addImm(J->getOperand(0).getImm())
+                .addReg(J->getOperand(1).getReg());
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBCLR;
+              continue;
+            }
+          } else if (J->isBranch()) {
+            if (J->isIndirectBranch()) {
+              if (ReturnMBB.hasAddressTaken())
+                OtherReference = true;
+            } else
+              for (unsigned i = 0; i < J->getNumOperands(); ++i)
+                if (J->getOperand(i).isMBB() &&
+                    J->getOperand(i).getMBB() == &ReturnMBB)
+                  OtherReference = true;
+          } else if (!J->isTerminator() && !J->isDebugValue())
+            break;
+
+          if (J == (*PI)->begin())
+            break;
+
+          --J;
+        }
+
+        if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+          OtherReference = true;
+
+	// Predecessors are stored in a vector and can't be removed here.
+        if (!OtherReference && BlockChanged) {
+          PredToRemove.push_back(*PI);
+        }
+
+        if (BlockChanged)
+          Changed = true;
+      }
+
+      for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+        PredToRemove[i]->removeSuccessor(&ReturnMBB);
+
+      if (Changed && !ReturnMBB.hasAddressTaken()) {
+        // We now might be able to merge this blr-only block into its
+        // by-layout predecessor.
+        if (ReturnMBB.pred_size() == 1 &&
+            (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
+          // Move the blr into the preceding block.
+          MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+          PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+          PrevMBB.removeSuccessor(&ReturnMBB);
+        }
+
+        if (ReturnMBB.pred_empty())
+          ReturnMBB.eraseFromParent();
+      }
+
+      return Changed;
+    }
+
+public:
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+      TII = TM->getInstrInfo();
+
+      bool Changed = false;
+
+      // If the function does not have at least two blocks, then there is
+      // nothing to do.
+      if (MF.size() < 2)
+        return Changed;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++; 
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+                "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 635e348..34a1a73 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -120,6 +120,17 @@ public:
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
+
+  // Select analysis.
+  virtual bool canInsertSelect(const MachineBasicBlock&,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               unsigned, unsigned, int&, int&, int&) const;
+  virtual void insertSelect(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI, DebugLoc DL,
+                            unsigned DstReg,
+                            const SmallVectorImpl<MachineOperand> &Cond,
+                            unsigned TrueReg, unsigned FalseReg) const;
+
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
@@ -146,6 +157,66 @@ public:
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const;
+
+  // If conversion by predication (only supported by some branch instructions).
+  // All of the profitability checks always return true; it is always
+  // profitable to use the predicated branches.
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCycles, unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const {
+    return true;
+  }
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumT, unsigned ExtraT,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumF, unsigned ExtraF,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCycles,
+                                         const BranchProbability
+                                         &Probability) const {
+    return true;
+  }
+
+  virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const {
+    return false;
+  }
+
+  // Predication support.
+  bool isPredicated(const MachineInstr *MI) const;
+
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  virtual bool isPredicable(MachineInstr *MI) const;
+
+  // Comparison optimization.
+
+
+  virtual bool analyzeCompare(const MachineInstr *MI,
+                              unsigned &SrcReg, unsigned &SrcReg2,
+                              int &Mask, int &Value) const;
+
+  virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
+                                    unsigned SrcReg, unsigned SrcReg2,
+                                    int Mask, int Value,
+                                    const MachineRegisterInfo *MRI) const;
+
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index ab90762..4763069 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -319,10 +319,7 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
 // PowerPC Flag Definitions.
 
 class isPPC64 { bit PPC64 = 1; }
-class isDOT   {
-  list<Register> Defs = [CR0];
-  bit RC  = 1;
-}
+class isDOT   { bit RC = 1; }
 
 class RegConstraint<string C> {
   string Constraints = C;
@@ -335,20 +332,111 @@ class NoEncode<string E> {
 //===----------------------------------------------------------------------===//
 // PowerPC Operand Definitions.
 
+// In the default PowerPC assembler syntax, registers are specified simply
+// by number, so they cannot be distinguished from immediate values (without
+// looking at the opcode).  This means that the default operand matching logic
+// for the asm parser does not work, and we need to specify custom matchers.
+// Since those can only be specified with RegisterOperand classes and not
+// directly on the RegisterClass, all instructions patterns used by the asm
+// parser need to use a RegisterOperand (instead of a RegisterClass) for
+// all their register operands.
+// For this purpose, we define one RegisterOperand for each RegisterClass,
+// using the same name as the class, just in lower case.
+
+def PPCRegGPRCAsmOperand : AsmOperandClass {
+  let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
+}
+def gprc : RegisterOperand<GPRC> {
+  let ParserMatchClass = PPCRegGPRCAsmOperand;
+}
+def PPCRegG8RCAsmOperand : AsmOperandClass {
+  let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
+}
+def g8rc : RegisterOperand<G8RC> {
+  let ParserMatchClass = PPCRegG8RCAsmOperand;
+}
+def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
+  let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
+  let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
+}
+def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
+  let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
+}
+def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
+  let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
+}
+def PPCRegF8RCAsmOperand : AsmOperandClass {
+  let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
+}
+def f8rc : RegisterOperand<F8RC> {
+  let ParserMatchClass = PPCRegF8RCAsmOperand;
+}
+def PPCRegF4RCAsmOperand : AsmOperandClass {
+  let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
+}
+def f4rc : RegisterOperand<F4RC> {
+  let ParserMatchClass = PPCRegF4RCAsmOperand;
+}
+def PPCRegVRRCAsmOperand : AsmOperandClass {
+  let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
+}
+def vrrc : RegisterOperand<VRRC> {
+  let ParserMatchClass = PPCRegVRRCAsmOperand;
+}
+def PPCRegCRBITRCAsmOperand : AsmOperandClass {
+  let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber";
+}
+def crbitrc : RegisterOperand<CRBITRC> {
+  let ParserMatchClass = PPCRegCRBITRCAsmOperand;
+}
+def PPCRegCRRCAsmOperand : AsmOperandClass {
+  let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
+}
+def crrc : RegisterOperand<CRRC> {
+  let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCS5ImmAsmOperand : AsmOperandClass {
+  let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+  let RenderMethod = "addImmOperands";
+}
 def s5imm   : Operand<i32> {
   let PrintMethod = "printS5ImmOperand";
+  let ParserMatchClass = PPCS5ImmAsmOperand;
+}
+def PPCU5ImmAsmOperand : AsmOperandClass {
+  let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u5imm   : Operand<i32> {
   let PrintMethod = "printU5ImmOperand";
+  let ParserMatchClass = PPCU5ImmAsmOperand;
+}
+def PPCU6ImmAsmOperand : AsmOperandClass {
+  let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u6imm   : Operand<i32> {
   let PrintMethod = "printU6ImmOperand";
+  let ParserMatchClass = PPCU6ImmAsmOperand;
+}
+def PPCS16ImmAsmOperand : AsmOperandClass {
+  let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
+  let RenderMethod = "addImmOperands";
 }
 def s16imm  : Operand<i32> {
   let PrintMethod = "printS16ImmOperand";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
+}
+def PPCU16ImmAsmOperand : AsmOperandClass {
+  let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u16imm  : Operand<i32> {
   let PrintMethod = "printU16ImmOperand";
+  let ParserMatchClass = PPCU16ImmAsmOperand;
 }
 def directbrtarget : Operand<OtherVT> {
   let PrintMethod = "printBranchOperand";
@@ -367,21 +455,49 @@ def aaddr : Operand<iPTR> {
 def symbolHi: Operand<i32> {
   let PrintMethod = "printSymbolHi";
   let EncoderMethod = "getHA16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def symbolLo: Operand<i32> {
   let PrintMethod = "printSymbolLo";
   let EncoderMethod = "getLO16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
+}
+def PPCCRBitMaskOperand : AsmOperandClass {
+ let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
 }
 def crbitm: Operand<i8> {
   let PrintMethod = "printcrbitm";
   let EncoderMethod = "get_crbitm_encoding";
+  let ParserMatchClass = PPCCRBitMaskOperand;
 }
 // Address operands
 // A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
-def ptr_rc_nor0 : PointerLikeRegClass<1>;
+def PPCRegGxRCNoR0Operand : AsmOperandClass {
+  let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
+  let ParserMatchClass = PPCRegGxRCNoR0Operand;
+}
+// A version of ptr_rc usable with the asm parser.
+def PPCRegGxRCOperand : AsmOperandClass {
+  let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
+  let ParserMatchClass = PPCRegGxRCOperand;
+}
 
-def dispRI : Operand<iPTR>;
-def dispRIX : Operand<iPTR>;
+def PPCDispRIOperand : AsmOperandClass {
+ let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+}
+def dispRI : Operand<iPTR> {
+  let ParserMatchClass = PPCDispRIOperand;
+}
+def PPCDispRIXOperand : AsmOperandClass {
+ let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+}
+def dispRIX : Operand<iPTR> {
+  let ParserMatchClass = PPCDispRIXOperand;
+}
 
 def memri : Operand<iPTR> {
   let PrintMethod = "printMemRegImm";
@@ -390,7 +506,7 @@ def memri : Operand<iPTR> {
 }
 def memrr : Operand<iPTR> {
   let PrintMethod = "printMemRegReg";
-  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
+  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
 }
 def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
   let PrintMethod = "printMemRegImmShifted";
@@ -407,7 +523,7 @@ def memr : Operand<iPTR> {
 // PowerPC Predicate operand.
 def pred : Operand<OtherVT> {
   let PrintMethod = "printPredicateOperand";
-  let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
+  let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
 }
 
 // Define PowerPC specific addressing mode.
@@ -430,6 +546,252 @@ def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
 
 //===----------------------------------------------------------------------===//
+// PowerPC Multiclass Definitions.
+
+multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                     string asmbase, string asmstr, InstrItinClass itin,
+                     list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_11<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_11<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MForm_2<opcode, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MForm_2<opcode, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MDForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MDForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+                     string asmbase, string asmstr, InstrItinClass itin,
+                     list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MDSForm_1<opcode, xo, OOL, IOL,
+                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                        pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MDSForm_1<opcode, xo, OOL, IOL,
+                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                        []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XSForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XSForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_26<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : XForm_26<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_2<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_2<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_3<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_3<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+//===----------------------------------------------------------------------===//
 // PowerPC Instruction Definitions.
 
 // Pseudo-instructions:
@@ -442,12 +804,12 @@ def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCAL
                               [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+def UPDATE_VRSAVE    : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
                               "UPDATE_VRSAVE $rD, $rS", []>;
 }
 
 let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
+def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
                        [(set i32:$result,
                              (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
                          
@@ -458,21 +820,21 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
   // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
   // because either operand might become the first operand in an isel, and
   // that operand cannot be r0.
-  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
-                              GPRC_NOR0:$T, GPRC_NOR0:$F,
+  def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+                              gprc_nor0:$T, gprc_nor0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I4",
                               []>;
-  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
-                              G8RC_NOX0:$T, G8RC_NOX0:$F,
+  def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+                              g8rc_nox0:$T, g8rc_nox0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I8",
                               []>;
-  def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+  def SELECT_CC_F4  : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
                               i32imm:$BROPC), "#SELECT_CC_F4",
                               []>;
-  def SELECT_CC_F8  : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+  def SELECT_CC_F8  : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
                               i32imm:$BROPC), "#SELECT_CC_F8",
                               []>;
-  def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+  def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
                               i32imm:$BROPC), "#SELECT_CC_VRRC",
                               []>;
 }
@@ -480,21 +842,26 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
 // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
 // scavenge a register for it.
 let mayStore = 1 in
-def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
                      "#SPILL_CR", []>;
 
 // RESTORE_CR - Indicate that we're restoring the CR register (previously
 // spilled), so we'll need to scavenge a register for it.
 let mayLoad = 1 in
-def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
+def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
                      "#RESTORE_CR", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
   let isReturn = 1, Uses = [LR, RM] in
     def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
                            [(retflag)]>;
-  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
     def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+                            "b${cond:cc}ctr ${cond:reg}", BrB, []>;
+  }
 }
 
 let Defs = [LR] in
@@ -511,10 +878,21 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   // BCC represents an arbitrary conditional branch on a predicate.
   // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
   // a two-value operand where a dag node expects two operands. :(
-  let isCodeGenOnly = 1 in
+  let isCodeGenOnly = 1 in {
     def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
                     "b${cond:cc} ${cond:reg}, $dst"
-                    /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+                    /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+    let isReturn = 1, Uses = [LR, RM] in
+    def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+                           "b${cond:cc}lr ${cond:reg}", BrB, []>;
+
+    let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+      def BDZLR  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+                             "bdzlr", BrB, []>;
+      def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+                             "bdnzlr", BrB, []>;
+    }
+  }
 
   let Defs = [CTR], Uses = [CTR] in {
     def BDZ  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
@@ -544,6 +922,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
     def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
                              "bctrl", BrB, [(PPCbctrl)]>,
                 Requires<[In32BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+                             "b${cond:cc}ctrl ${cond:reg}", BrB, []>;
   }
 }
 
@@ -589,7 +971,7 @@ def TAILBA   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   []>;
 
 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
-  def EH_SjLj_SetJmp32  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+  def EH_SjLj_SetJmp32  : Pseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP32",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[In32BitMode]>;
@@ -638,89 +1020,89 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
       [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
       [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
       [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
       [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
       [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
       [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
       [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
       [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
       [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
       [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
       [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
       [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
       [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
       [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
       [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
       [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
       [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
       [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
       [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
       [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
       [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
 
     def ATOMIC_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
       [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
       [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
       [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
-def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$src),
+def LWARX : XForm_1<31,  20, (outs gprc:$rD), (ins memrr:$src),
                    "lwarx $rD, $src", LdStLWARX,
                    [(set i32:$rD, (PPClarx xoaddr:$src))]>;
 
 let Defs = [CR0] in
-def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
                    "stwcx. $rS, $dst", LdStSTWCX,
                    [(PPCstcx i32:$rS, xoaddr:$dst)]>,
                    isDOT;
@@ -734,93 +1116,93 @@ def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
 
 // Unindexed (r+i) Loads. 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
                   [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
-def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
                   [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
-def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
                   [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
-def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
                   [(set i32:$rD, (load iaddr:$src))]>;
 
-def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
                   "lfs $rD, $src", LdStLFD,
                   [(set f32:$rD, (load iaddr:$src))]>;
-def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
                   "lfd $rD, $src", LdStLFD,
                   [(set f64:$rD, (load iaddr:$src))]>;
 
 
 // Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lbzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhau $rD, $addr", LdStLHAU,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lwzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfsu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfdu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
 
 // Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhaux $rD, $addr", LdStLHAU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfsux $rD, $addr", LdStLFDU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfdux $rD, $addr", LdStLFDU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
@@ -831,39 +1213,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
 // Indexed (r+r) Loads.
 //
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZX : XForm_1<31,  87, (outs GPRC:$rD), (ins memrr:$src),
+def LBZX : XForm_1<31,  87, (outs gprc:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
                    [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
-def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
                    [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
-def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
                    [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
-def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
+def LWZX : XForm_1<31,  23, (outs gprc:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
                    [(set i32:$rD, (load xaddr:$src))]>;
                    
                    
-def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
                    "lhbrx $rD, $src", LdStLoad,
                    [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
-def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
+def LWBRX : XForm_1<31,  534, (outs gprc:$rD), (ins memrr:$src),
                    "lwbrx $rD, $src", LdStLoad,
                    [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
 
-def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+def LFSX   : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
                       "lfsx $frD, $src", LdStLFD,
                       [(set f32:$frD, (load xaddr:$src))]>;
-def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+def LFDX   : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
                       "lfdx $frD, $src", LdStLFD,
                       [(set f64:$frD, (load xaddr:$src))]>;
 
-def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
                       "lfiwax $frD, $src", LdStLFD,
                       [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
-def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
                       "lfiwzx $frD, $src", LdStLFD,
                       [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
 }
@@ -874,38 +1256,38 @@ def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
 
 // Unindexed (r+i) Stores.
 let PPC970_Unit = 2 in {
-def STB  : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+def STB  : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
                    [(truncstorei8 i32:$rS, iaddr:$src)]>;
-def STH  : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+def STH  : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
                    [(truncstorei16 i32:$rS, iaddr:$src)]>;
-def STW  : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+def STW  : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
                    [(store i32:$rS, iaddr:$src)]>;
-def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
                    "stfs $rS, $dst", LdStSTFD,
                    [(store f32:$rS, iaddr:$dst)]>;
-def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
                    "stfd $rS, $dst", LdStSTFD,
                    [(store f64:$rS, iaddr:$dst)]>;
 }
 
 // Unindexed (r+i) Stores with Update (preinc).
 let PPC970_Unit = 2, mayStore = 1 in {
-def STBU  : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STBU  : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
                     "stbu $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU  : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STHU  : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
                     "sthu $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU  : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STWU  : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
                     "stwu $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
                     "stfsu $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
                     "stfdu $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 }
@@ -926,59 +1308,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
 
 // Indexed (r+r) Stores.
 let PPC970_Unit = 2 in {
-def STBX  : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+def STBX  : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
                    [(truncstorei8 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STHX  : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+def STHX  : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
                    [(truncstorei16 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STWX  : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWX  : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
                    [(store i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
  
-def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
                    "sthbrx $rS, $dst", LdStStore,
                    [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
                    PPC970_DGroup_Cracked;
-def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
                    "stwbrx $rS, $dst", LdStStore,
                    [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
                    PPC970_DGroup_Cracked;
 
-def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
                      "stfiwx $frS, $dst", LdStSTFD,
                      [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
                      
-def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
                      "stfsx $frS, $dst", LdStSTFD,
                      [(store f32:$frS, xaddr:$dst)]>;
-def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
                      "stfdx $frS, $dst", LdStSTFD,
                      [(store f64:$frS, xaddr:$dst)]>;
 }
 
 // Indexed (r+r) Stores with Update (preinc).
 let PPC970_Unit = 2, mayStore = 1 in {
-def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
                     "stbux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
                     "sthux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
                     "stwux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
                     "stfsux $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
                     "stfdux $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
@@ -1007,193 +1389,206 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
+def ADDI   : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
                      [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
-def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+let BaseName = "addic" in {
+let Defs = [CARRY] in
+def ADDIC  : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
                      [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
-                     PPC970_DGroup_Cracked;
-def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     RecFormRel, PPC970_DGroup_Cracked;
+let Defs = [CARRY, CR0] in
+def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "addic. $rD, $rA, $imm", IntGeneral,
-                     []>;
+                     []>, isDOT, RecFormRel;
 }
-def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
+def ADDIS  : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolHi:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
                      [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
 let isCodeGenOnly = 1 in
-def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
+def LA     : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$sym),
                      "la $rD, $sym($rA)", IntGeneral,
                      [(set i32:$rD, (add i32:$rA,
                                           (PPClo tglobaladdr:$sym, 0)))]>;
-def MULLI  : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+def MULLI  : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "mulli $rD, $rA, $imm", IntMulLI,
                      [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
-def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+let Defs = [CARRY] in
+def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
                      [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
-}
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-  def LI  : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+  def LI  : DForm_2_r0<14, (outs gprc:$rD), (ins symbolLo:$imm),
                        "li $rD, $imm", IntSimple,
                        [(set i32:$rD, immSExt16:$imm)]>;
-  def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+  def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins symbolHi:$imm),
                        "lis $rD, $imm", IntSimple,
                        [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
 }
 }
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+let Defs = [CR0] in {
+def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "andi. $dst, $src1, $src2", IntGeneral,
                     [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
                     isDOT;
-def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "andis. $dst, $src1, $src2", IntGeneral,
                     [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
                     isDOT;
-def ORI   : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+}
+def ORI   : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "ori $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
-def ORIS  : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def ORIS  : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "oris $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI  : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def XORI  : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "xori $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
-def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "xoris $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
 def NOP   : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
                          []>;
-def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
-                        "cmpwi $crD, $rA, $imm", IntCompare>;
-def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
-                         "cmplwi $dst, $src1, $src2", IntCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
+                          "cmpwi $crD, $rA, $imm", IntCompare>;
+  def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
+                           "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+}
+
+let PPC970_Unit = 1, neverHasSideEffects = 1 in {  // FXU Operations.
+defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "nand", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
+defm AND  : XForm_6r<31,  28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "and", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+defm ANDC : XForm_6r<31,  60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "andc", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+defm OR   : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "or", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
+defm NOR  : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "nor", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+defm ORC  : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "orc", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+defm EQV  : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "eqv", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
+defm XOR  : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "xor", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+defm SLW  : XForm_6r<31,  24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "slw", "$rA, $rS, $rB", IntGeneral,
+                     [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
+defm SRW  : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "srw", "$rA, $rS, $rB", IntGeneral,
+                     [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
+defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                      "sraw", "$rA, $rS, $rB", IntShift,
+                      [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
 }
 
-
 let PPC970_Unit = 1 in {  // FXU Operations.
-def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "nand $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
-def AND  : XForm_6<31,  28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "and $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
-def ANDC : XForm_6<31,  60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "andc $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
-def OR   : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "or $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
-def NOR  : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "nor $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
-def ORC  : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "orc $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
-def EQV  : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "eqv $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
-def XOR  : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "xor $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
-def SLW  : XForm_6<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "slw $rA, $rS, $rB", IntGeneral,
-                   [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
-def SRW  : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "srw $rA, $rS, $rB", IntGeneral,
-                   [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
-let Defs = [CARRY] in {
-def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "sraw $rA, $rS, $rB", IntShift,
-                   [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
+let neverHasSideEffects = 1 in {
+defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
+                        "srawi", "$rA, $rS, $SH", IntShift,
+                        [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
+defm CNTLZW : XForm_11r<31,  26, (outs gprc:$rA), (ins gprc:$rS),
+                        "cntlzw", "$rA, $rS", IntGeneral,
+                        [(set i32:$rA, (ctlz i32:$rS))]>;
+defm EXTSB  : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
+                        "extsb", "$rA, $rS", IntSimple,
+                        [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
+defm EXTSH  : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
+                        "extsh", "$rA, $rS", IntSimple,
+                        [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+}
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPW   : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+                            "cmpw $crD, $rA, $rB", IntCompare>;
+  def CMPLW  : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+                            "cmplw $crD, $rA, $rB", IntCompare>;
 }
 }
-
-let PPC970_Unit = 1 in {  // FXU Operations.
-let Defs = [CARRY] in {
-def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
-                     "srawi $rA, $rS, $SH", IntShift,
-                     [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
-}
-def CNTLZW : XForm_11<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "cntlzw $rA, $rS", IntGeneral,
-                      [(set i32:$rA, (ctlz i32:$rS))]>;
-def EXTSB  : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsb $rA, $rS", IntSimple,
-                      [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
-def EXTSH  : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsh $rA, $rS", IntSimple,
-                      [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
-
-def CMPW   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
-                          "cmpw $crD, $rA, $rB", IntCompare>;
-def CMPLW  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
-                          "cmplw $crD, $rA, $rB", IntCompare>;
-}
 let PPC970_Unit = 3 in {  // FPU Operations.
 //def FCMPO  : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
 //                      "fcmpo $crD, $fA, $fB", FPCompare>;
-def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
-                      "fcmpu $crD, $fA, $fB", FPCompare>;
-def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
-                      "fcmpu $crD, $fA, $fB", FPCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
+                        "fcmpu $crD, $fA, $fB", FPCompare>;
+  def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+                        "fcmpu $crD, $fA, $fB", FPCompare>;
+}
 
 let Uses = [RM] in {
-  def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fctiwz $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+  let neverHasSideEffects = 1 in {
+  defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "fctiwz", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
 
-  def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
-                        "frsp $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (fround f64:$frB))]>;
+  defm FRSP   : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
+                          "frsp", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fround f64:$frB))]>;
 
   // The frin -> nearbyint mapping is valid only in fast-math mode.
-  def FRIND  : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frin $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (fnearbyint f64:$frB))]>;
-  def FRINS  : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frin $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (fnearbyint f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIND  : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frin", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (fnearbyint f64:$frB))]>;
+  defm FRINS  : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frin", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fnearbyint f32:$frB))]>;
+  }
 
   // These pseudos expand to rint but also set FE_INEXACT when the result does
   // not equal the argument.
   let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
-    def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+    def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB),
                             "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
-    def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+    def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB),
                             "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
   }
 
-  def FRIPD  : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frip $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (fceil f64:$frB))]>;
-  def FRIPS  : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frip $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (fceil f32:$frB))]>;
-  def FRIZD  : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "friz $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (ftrunc f64:$frB))]>;
-  def FRIZS  : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "friz $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (ftrunc f32:$frB))]>;
-  def FRIMD  : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frim $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (ffloor f64:$frB))]>;
-  def FRIMS  : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frim $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (ffloor f32:$frB))]>;
-
-  def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fsqrt $frD, $frB", FPSqrt,
-                        [(set f64:$frD, (fsqrt f64:$frB))]>;
-  def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "fsqrts $frD, $frB", FPSqrt,
-                        [(set f32:$frD, (fsqrt f32:$frB))]>;
+  let neverHasSideEffects = 1 in {
+  let Interpretation64Bit = 1 in
+  defm FRIPD  : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frip", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (fceil f64:$frB))]>;
+  defm FRIPS  : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frip", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fceil f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIZD  : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "friz", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (ftrunc f64:$frB))]>;
+  defm FRIZS  : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "friz", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (ftrunc f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIMD  : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frim", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (ffloor f64:$frB))]>;
+  defm FRIMS  : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frim", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (ffloor f32:$frB))]>;
+
+  defm FSQRT  : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "fsqrt", "$frD, $frB", FPSqrt,
+                          [(set f64:$frD, (fsqrt f64:$frB))]>;
+  defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "fsqrts", "$frD, $frB", FPSqrt,
+                          [(set f32:$frD, (fsqrt f32:$frB))]>;
+  }
   }
 }
 
@@ -1201,69 +1596,74 @@ let Uses = [RM] in {
 /// often coalesced away and we don't want the dispatch group builder to think
 /// that they will fill slots (which could cause the load of a LSU reject to
 /// sneak into a d-group with a store).
-def FMR   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
-                     "fmr $frD, $frB", FPGeneral,
-                     []>,  // (set f32:$frD, f32:$frB)
-                     PPC970_Unit_Pseudo;
+let neverHasSideEffects = 1 in
+defm FMR   : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
+                       "fmr", "$frD, $frB", FPGeneral,
+                       []>,  // (set f32:$frD, f32:$frB)
+                       PPC970_Unit_Pseudo;
 
-let PPC970_Unit = 3 in {  // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in {  // FPU Operations.
 // These are artificially split into two different forms, for 4/8 byte FP.
-def FABSS  : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fabs $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (fabs f32:$frB))]>;
-def FABSD  : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fabs $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (fabs f64:$frB))]>;
-def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fnabs $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
-def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fnabs $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
-def FNEGS  : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fneg $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (fneg f32:$frB))]>;
-def FNEGD  : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fneg $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (fneg f64:$frB))]>;
+defm FABSS  : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
+                        "fabs", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fabs f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FABSD  : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fabs", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fabs f64:$frB))]>;
+defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
+                        "fnabs", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+let Interpretation64Bit = 1 in
+defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fnabs", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
+defm FNEGS  : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
+                        "fneg", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fneg f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FNEGD  : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fneg", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fneg f64:$frB))]>;
 
 // Reciprocal estimates.
-def FRE      : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fre $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (PPCfre f64:$frB))]>;
-def FRES     : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "fres $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (PPCfre f32:$frB))]>;
-def FRSQRTE  : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frsqrte $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
-def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frsqrtes $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+defm FRE      : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "fre", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfre f64:$frB))]>;
+defm FRES     : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "fres", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (PPCfre f32:$frB))]>;
+defm FRSQRTE  : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frsqrte", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frsqrtes", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
 }
 
 // XL-Form instructions.  condition register logical ops.
 //
-def MCRF   : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+let neverHasSideEffects = 1 in
+def MCRF   : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
                       "mcrf $BF, $BFA", BrMCR>,
              PPC970_DGroup_First, PPC970_Unit_CRU;
 
-def CREQV  : XLForm_1<19, 289, (outs CRBITRC:$CRD),
-                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+def CREQV  : XLForm_1<19, 289, (outs crbitrc:$CRD),
+                               (ins crbitrc:$CRA, crbitrc:$CRB),
                       "creqv $CRD, $CRA, $CRB", BrCR,
                       []>;
 
-def CROR  : XLForm_1<19, 449, (outs CRBITRC:$CRD),
-                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+def CROR  : XLForm_1<19, 449, (outs crbitrc:$CRD),
+                               (ins crbitrc:$CRA, crbitrc:$CRB),
                       "cror $CRD, $CRA, $CRB", BrCR,
                       []>;
 
 let isCodeGenOnly = 1 in {
-def CRSET  : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+def CRSET  : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
               "creqv $dst, $dst, $dst", BrCR,
               []>;
 
-def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
+def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
               "crxor $dst, $dst, $dst", BrCR,
               []>;
 
@@ -1281,23 +1681,23 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
 // XFX-Form instructions.  Instructions that deal with SPRs.
 //
 let Uses = [CTR] in {
-def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
                           "mfctr $rT", SprMFSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
-def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
                           "mtctr $rS", SprMTSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
 let Defs = [LR] in {
-def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
                           "mtlr $rS", SprMTSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Uses = [LR] in {
-def MFLR  : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+def MFLR  : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
                           "mflr $rT", SprMFSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
@@ -1305,19 +1705,19 @@ def MFLR  : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
 // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
 // a GPR on the PPC970.  As such, copies in and out have the same performance
 // characteristics as an OR instruction.
-def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
                              "mtspr 256, $rS", IntGeneral>,
                PPC970_DGroup_Single, PPC970_Unit_FXU;
-def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
                              "mfspr $rT, 256", IntGeneral>,
                PPC970_DGroup_First, PPC970_Unit_FXU;
 
 let isCodeGenOnly = 1 in {
   def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
-                                (outs VRSAVERC:$reg), (ins GPRC:$rS),
+                                (outs VRSAVERC:$reg), (ins gprc:$rS),
                                 "mtspr 256, $rS", IntGeneral>,
                   PPC970_DGroup_Single, PPC970_Unit_FXU;
-  def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+  def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
                                 (ins VRSAVERC:$reg),
                                 "mfspr $rT, 256", IntGeneral>,
                   PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1335,7 +1735,8 @@ let mayLoad = 1 in
 def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
                      "#RESTORE_VRSAVE", []>;
 
-def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
+let neverHasSideEffects = 1 in {
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins gprc:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
@@ -1350,21 +1751,23 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
 //
 // FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
 let isCodeGenOnly = 1 in
-def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+def MFCRpseud: XFXForm_3<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
                        "#MFCRpseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
-            
-def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
-                     "mfcr $rT", SprMFCR>,
-                     PPC970_MicroCode, PPC970_Unit_CRU;
 
-def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
                        "mfocrf $rT, $FXM", SprMFCR>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
+def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
 
 // Pseudo instruction to perform FADD in round-to-zero mode.
 let usesCustomInserter = 1, Uses = [RM] in {
-  def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+  def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
                       [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
 }
 
@@ -1377,123 +1780,118 @@ let Uses = [RM], Defs = [RM] in {
   def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
                         "mtfsb1 $FM", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
-  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
                        "mtfsf $FM, $rT", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 let Uses = [RM] in {
-  def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
+  def MFFS   : XForm_42<63, 583, (outs f8rc:$rT), (ins),
                          "mffs $rT", IntMFFS,
                          [(set f64:$rT, (PPCmffs))]>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 
 
-let PPC970_Unit = 1 in {  // FXU Operations.
-
+let PPC970_Unit = 1, neverHasSideEffects = 1 in {  // FXU Operations.
 // XO-Form instructions.  Arithmetic instructions that can set overflow bit
 //
-def ADD4  : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "add $rT, $rA, $rB", IntSimple,
-                     [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
-let Defs = [CARRY] in {
-def ADDC  : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "addc $rT, $rA, $rB", IntGeneral,
-                     [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
-                     PPC970_DGroup_Cracked;
-}
-def DIVW  : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "divw $rT, $rA, $rB", IntDivW,
-                     [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "divwu $rT, $rA, $rB", IntDivW,
-                     [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mulhw $rT, $rA, $rB", IntMulHW,
-                     [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
-def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mulhwu $rT, $rA, $rB", IntMulHWU,
-                     [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mullw $rT, $rA, $rB", IntMulHW,
-                     [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
-def SUBF  : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "subf $rT, $rA, $rB", IntGeneral,
-                     [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
-let Defs = [CARRY] in {
-def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "subfc $rT, $rA, $rB", IntGeneral,
-                     [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
-                     PPC970_DGroup_Cracked;
-}
-def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "neg $rT, $rA", IntSimple,
-                      [(set i32:$rT, (ineg i32:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                      "adde $rT, $rA, $rB", IntGeneral,
-                      [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
-def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "addme $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (adde i32:$rA, -1))]>;
-def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "addze $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (adde i32:$rA, 0))]>;
-def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                      "subfe $rT, $rA, $rB", IntGeneral,
-                      [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
-def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "subfme $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (sube -1, i32:$rA))]>;
-def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "subfze $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (sube 0, i32:$rA))]>;
+defm ADD4  : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "add", "$rT, $rA, $rB", IntSimple,
+                       [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+defm ADDC  : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "addc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+                        PPC970_DGroup_Cracked;
+defm DIVW  : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "divw", "$rT, $rA, $rB", IntDivW,
+                       [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "divwu", "$rT, $rA, $rB", IntDivW,
+                       [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "mulhw", "$rT, $rA, $rB", IntMulHW,
+                       [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
+defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "mulhwu", "$rT, $rA, $rB", IntMulHWU,
+                       [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "mullw", "$rT, $rA, $rB", IntMulHW,
+                       [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+defm SUBF  : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "subf", "$rT, $rA, $rB", IntGeneral,
+                       [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "subfc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+                        PPC970_DGroup_Cracked;
+defm NEG    : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
+                        "neg", "$rT, $rA", IntSimple,
+                        [(set i32:$rT, (ineg i32:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE  : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "adde", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
+defm ADDME  : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "addme", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (adde i32:$rA, -1))]>;
+defm ADDZE  : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "addze", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (adde i32:$rA, 0))]>;
+defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "subfe", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
+defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "subfme", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (sube -1, i32:$rA))]>;
+defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "subfze", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (sube 0, i32:$rA))]>;
 }
 }
 
 // A-Form instructions.  Most of the instructions executed in the FPU are of
 // this type.
 //
-let PPC970_Unit = 3 in {  // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in {  // FPU Operations.
 let Uses = [RM] in {
-  def FMADD : AForm_1<63, 29, 
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FMADD : AForm_1r<63, 29, 
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
-  def FMADDS : AForm_1<59, 29,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FMADDS : AForm_1r<59, 29,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
-  def FMSUB : AForm_1<63, 28,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FMSUB : AForm_1r<63, 28,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT,
                             (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
-  def FMSUBS : AForm_1<59, 28,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FMSUBS : AForm_1r<59, 28,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT,
                             (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
-  def FNMADD : AForm_1<63, 31,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FNMADD : AForm_1r<63, 31,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT,
                             (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
-  def FNMADDS : AForm_1<59, 31,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FNMADDS : AForm_1r<59, 31,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT,
                             (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
-  def FNMSUB : AForm_1<63, 30,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FNMSUB : AForm_1r<63, 30,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
                                                  (fneg f64:$FRB))))]>;
-  def FNMSUBS : AForm_1<59, 30,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FNMSUBS : AForm_1r<59, 30,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
                                                  (fneg f32:$FRB))))]>;
 }
@@ -1501,53 +1899,56 @@ let Uses = [RM] in {
 // having 4 of these, force the comparison to always be an 8-byte double (code
 // should use an FMRSD if the input comparison value really wants to be a float)
 // and 4/8 byte forms for the result and operand type..
-def FSELD : AForm_1<63, 23,
-                    (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                    "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
-def FSELS : AForm_1<63, 23,
-                     (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                     "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
+let Interpretation64Bit = 1 in
+defm FSELD : AForm_1r<63, 23,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
+defm FSELS : AForm_1r<63, 23,
+                      (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
 let Uses = [RM] in {
-  def FADD  : AForm_2<63, 21,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fadd $FRT, $FRA, $FRB", FPAddSub,
-                      [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
-  def FADDS : AForm_2<59, 21,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fadds $FRT, $FRA, $FRB", FPGeneral,
-                      [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
-  def FDIV  : AForm_2<63, 18,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fdiv $FRT, $FRA, $FRB", FPDivD,
-                      [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
-  def FDIVS : AForm_2<59, 18,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fdivs $FRT, $FRA, $FRB", FPDivS,
-                      [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
-  def FMUL  : AForm_3<63, 25,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
-                      "fmul $FRT, $FRA, $FRC", FPFused,
-                      [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
-  def FMULS : AForm_3<59, 25,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
-                      "fmuls $FRT, $FRA, $FRC", FPGeneral,
-                      [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
-  def FSUB  : AForm_2<63, 20,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fsub $FRT, $FRA, $FRB", FPAddSub,
-                      [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
-  def FSUBS : AForm_2<59, 20,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fsubs $FRT, $FRA, $FRB", FPGeneral,
-                      [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+  defm FADD  : AForm_2r<63, 21,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+                        "fadd", "$FRT, $FRA, $FRB", FPAddSub,
+                        [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+  defm FADDS : AForm_2r<59, 21,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+                        "fadds", "$FRT, $FRA, $FRB", FPGeneral,
+                        [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+  defm FDIV  : AForm_2r<63, 18,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+                        "fdiv", "$FRT, $FRA, $FRB", FPDivD,
+                        [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+  defm FDIVS : AForm_2r<59, 18,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+                        "fdivs", "$FRT, $FRA, $FRB", FPDivS,
+                        [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+  defm FMUL  : AForm_3r<63, 25,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
+                        "fmul", "$FRT, $FRA, $FRC", FPFused,
+                        [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+  defm FMULS : AForm_3r<59, 25,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
+                        "fmuls", "$FRT, $FRA, $FRC", FPGeneral,
+                        [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+  defm FSUB  : AForm_2r<63, 20,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+                        "fsub", "$FRT, $FRA, $FRB", FPAddSub,
+                        [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+  defm FSUBS : AForm_2r<59, 20,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+                        "fsubs", "$FRT, $FRA, $FRB", FPGeneral,
+                        [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
   }
 }
 
+let neverHasSideEffects = 1 in {
 let PPC970_Unit = 1 in {  // FXU Operations.
+  let isSelect = 1 in
   def ISEL  : AForm_4<31, 15,
-                     (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
+                     (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
 }
@@ -1557,26 +1958,29 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 //
 let isCommutable = 1 in {
 // RLWIMI can be commuted if the rotate amount is zero.
-def RLWIMI : MForm_2<20,
-                     (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, 
-                      u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
-                      []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
-                      NoEncode<"$rSi">;
+defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
+                       (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
+                       u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate,
+                       []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+                       NoEncode<"$rSi">;
 }
+let BaseName = "rlwinm" in {
 def RLWINM : MForm_2<21,
-                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
                      "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>;
+                     []>, RecFormRel;
+let Defs = [CR0] in
 def RLWINMo : MForm_2<21,
-                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
-                     "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>, isDOT, PPC970_DGroup_Cracked;
-def RLWNM  : MForm_2<23,
-                     (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
-                     "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
-                     []>;
+                      (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                      "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                      []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
 }
-
+defm RLWNM  : MForm_2r<23, (outs gprc:$rA),
+                       (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
+                       "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral,
+                       []>;
+}
+} // neverHasSideEffects = 1
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Patterns
@@ -1693,14 +2097,6 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
 def : Pat<(f64 (fextend f32:$src)),
           (COPY_TO_REGCLASS $src, F8RC)>;
 
-// Memory barriers
-def : Pat<(membarrier (i32 imm /*ll*/),
-                      (i32 imm /*ls*/),
-                      (i32 imm /*sl*/),
-                      (i32 imm /*ss*/),
-                      (i32 imm /*device*/)),
-           (SYNC)>;
-
 def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
 
 // Additional FNMSUB patterns: -a*c + b == -(a*c - b)
@@ -1715,3 +2111,98 @@ def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
 
 include "PPCInstrAltivec.td"
 include "PPCInstr64Bit.td"
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instructions used for assembler/disassembler only
+//
+
+def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
+                         "isync", SprISYNC, []>;
+
+def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
+                    "icbi $src", LdStICBI, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Assembler Instruction Aliases
+//
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class PPCAsmPseudo<string asm, dag iops>
+  : Instruction {
+  let Namespace = "PPC";
+  bit PPC64 = 0;  // Default value, override with isPPC64
+
+  let OutOperandList = (outs);
+  let InOperandList = iops;
+  let Pattern = [];
+  let AsmString = asm;
+  let isAsmParserOnly = 1;
+  let isPseudo = 1;
+}
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
+                        (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
+                        (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
+                        (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
+                        (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+
+def : InstAlias<"blt $cc, $dst", (BCC 12, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bgt $cc, $dst", (BCC 44, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"beq $cc, $dst", (BCC 76, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bun $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bso $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bge $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bnl $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"ble $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bng $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bne $cc, $dst", (BCC 68, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bnu $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bns $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
+
+def : InstAlias<"bltlr $cc", (BCLR 12, crrc:$cc)>;
+def : InstAlias<"bgtlr $cc", (BCLR 44, crrc:$cc)>;
+def : InstAlias<"beqlr $cc", (BCLR 76, crrc:$cc)>;
+def : InstAlias<"bunlr $cc", (BCLR 108, crrc:$cc)>;
+def : InstAlias<"bsolr $cc", (BCLR 108, crrc:$cc)>;
+def : InstAlias<"bgelr $cc", (BCLR 4, crrc:$cc)>;
+def : InstAlias<"bnllr $cc", (BCLR 4, crrc:$cc)>;
+def : InstAlias<"blelr $cc", (BCLR 36, crrc:$cc)>;
+def : InstAlias<"bnglr $cc", (BCLR 36, crrc:$cc)>;
+def : InstAlias<"bnelr $cc", (BCLR 68, crrc:$cc)>;
+def : InstAlias<"bnulr $cc", (BCLR 100, crrc:$cc)>;
+def : InstAlias<"bnslr $cc", (BCLR 100, crrc:$cc)>;
+
+def : InstAlias<"bltctr $cc", (BCCTR 12, crrc:$cc)>;
+def : InstAlias<"bgtctr $cc", (BCCTR 44, crrc:$cc)>;
+def : InstAlias<"beqctr $cc", (BCCTR 76, crrc:$cc)>;
+def : InstAlias<"bunctr $cc", (BCCTR 108, crrc:$cc)>;
+def : InstAlias<"bsoctr $cc", (BCCTR 108, crrc:$cc)>;
+def : InstAlias<"bgectr $cc", (BCCTR 4, crrc:$cc)>;
+def : InstAlias<"bnlctr $cc", (BCCTR 4, crrc:$cc)>;
+def : InstAlias<"blectr $cc", (BCCTR 36, crrc:$cc)>;
+def : InstAlias<"bngctr $cc", (BCCTR 36, crrc:$cc)>;
+def : InstAlias<"bnectr $cc", (BCCTR 68, crrc:$cc)>;
+def : InstAlias<"bnuctr $cc", (BCCTR 100, crrc:$cc)>;
+def : InstAlias<"bnsctr $cc", (BCCTR 100, crrc:$cc)>;
+
+def : InstAlias<"bltctrl $cc", (BCCTRL 12, crrc:$cc)>;
+def : InstAlias<"bgtctrl $cc", (BCCTRL 44, crrc:$cc)>;
+def : InstAlias<"beqctrl $cc", (BCCTRL 76, crrc:$cc)>;
+def : InstAlias<"bunctrl $cc", (BCCTRL 108, crrc:$cc)>;
+def : InstAlias<"bsoctrl $cc", (BCCTRL 108, crrc:$cc)>;
+def : InstAlias<"bgectrl $cc", (BCCTRL 4, crrc:$cc)>;
+def : InstAlias<"bnlctrl $cc", (BCCTRL 4, crrc:$cc)>;
+def : InstAlias<"blectrl $cc", (BCCTRL 36, crrc:$cc)>;
+def : InstAlias<"bngctrl $cc", (BCCTRL 36, crrc:$cc)>;
+def : InstAlias<"bnectrl $cc", (BCCTRL 68, crrc:$cc)>;
+def : InstAlias<"bnuctrl $cc", (BCCTRL 100, crrc:$cc)>;
+def : InstAlias<"bnsctrl $cc", (BCCTRL 100, crrc:$cc)>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 9b0df3e..f8cf3a5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -14,6 +14,7 @@
 
 #include "PPC.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -51,7 +52,14 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   // before we return the symbol.
   if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
     Name += "$stub";
-    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    const char *PGP = AP.MAI->getPrivateGlobalPrefix();
+    const char *Prefix = "";
+    if (!Name.startswith(PGP)) {
+      // http://llvm.org/bugs/show_bug.cgi?id=15763
+      // all stubs and lazy_ptrs should be local symbols, which need leading 'L'
+      Prefix = PGP;
+    }
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name));
     MachineModuleInfoImpl::StubValueTy &StubSym =
       getMachOMMI(AP).getFnStubEntry(Sym);
     if (StubSym.getPointer())
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index ee18ead..40d1f3a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -84,6 +84,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
   int CRSpillFrameIndex;
 
+  /// If any of CR[2-4] need to be saved in the prologue and restored in the
+  /// epilogue then they are added to this array. This is used for the
+  /// 64-bit SVR4 ABI.
+  SmallVector<unsigned, 3> MustSaveCRs;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
@@ -154,6 +159,10 @@ public:
 
   int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
   void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
+
+  const SmallVector<unsigned, 3> &
+    getMustSaveCRs() const { return MustSaveCRs; }
+  void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
 };
 
 } // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 1d61a3a..2be6324 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -76,6 +76,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
 const TargetRegisterClass *
 PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
                                                                        const {
+  // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
+  // when it checks for ZERO folding.
   if (Kind == 1) {
     if (Subtarget.isPPC64())
       return &PPC::G8RC_NOX0RegClass;
@@ -452,6 +454,33 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   return false;
 }
 
+// Figure out if the offset in the instruction is shifted right two bits. This
+// is true for instructions like "STD", which the machine implicitly adds two
+// low zeros to.
+static bool usesIXAddr(const MachineInstr &MI) {
+  unsigned OpC = MI.getOpcode();
+
+  switch (OpC) {
+  default:
+    return false;
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::STD:
+    return true;
+  }
+}
+
+// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
+static unsigned getOffsetONFromFION(const MachineInstr &MI,
+                                    unsigned FIOperandNum) {
+  // Take into account whether it's an add or mem instruction
+  unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
+  if (MI.isInlineAsm())
+    OffsetOperandNo = FIOperandNum-1;
+
+  return OffsetOperandNo;
+}
+
 void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, unsigned FIOperandNum,
@@ -469,10 +498,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc dl = MI.getDebugLoc();
 
-  // Take into account whether it's an add or mem instruction
-  unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
-  if (MI.isInlineAsm())
-    OffsetOperandNo = FIOperandNum-1;
+  unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
 
   // Get the frame index.
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -514,17 +540,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                                 (is64Bit ? PPC::X1 : PPC::R1),
                                               false);
 
-  // Figure out if the offset in the instruction is shifted right two bits. This
-  // is true for instructions like "STD", which the machine implicitly adds two
-  // low zeros to.
-  bool isIXAddr = false;
-  switch (OpC) {
-  case PPC::LWA:
-  case PPC::LD:
-  case PPC::STD:
-    isIXAddr = true;
-    break;
-  }
+  // Figure out if the offset in the instruction is shifted right two bits.
+  bool isIXAddr = usesIXAddr(MI);
 
   // If the instruction is not present in ImmToIdxMap, then it has no immediate
   // form (and must be r+r).
@@ -616,3 +633,124 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const {
 unsigned PPCRegisterInfo::getEHHandlerRegister() const {
   return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
 }
+
+/// Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool PPCRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+  assert(Offset < 0 && "Local offset must be negative");
+
+  unsigned FIOperandNum = 0;
+  while (!MI->getOperand(FIOperandNum).isFI()) {
+    ++FIOperandNum;
+    assert(FIOperandNum < MI->getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+
+  if (!usesIXAddr(*MI))
+    Offset += MI->getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI->getOperand(OffsetOperandNo).getImm() << 2;
+
+  // It's the load/store FI references that cause issues, as it can be difficult
+  // to materialize the offset if it won't fit in the literal field. Estimate
+  // based on the size of the local frame and some conservative assumptions
+  // about the rest of the stack frame (note, this is pre-regalloc, so
+  // we don't know everything for certain yet) whether this offset is likely
+  // to be out of range of the immediate. Return true if so.
+
+  // We only generate virtual base registers for loads and stores that have
+  // an r+i form. Return false for everything else.
+  unsigned OpC = MI->getOpcode();
+  if (!ImmToIdxMap.count(OpC))
+    return false;
+
+  // Don't generate a new virtual base register just to add zero to it.
+  if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) &&
+      MI->getOperand(2).getImm() == 0)
+    return false;
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
+  const PPCFrameLowering *PPCFI =
+    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+  unsigned StackEst =
+    PPCFI->determineFrameLayout(MF, false, true);
+
+  // If we likely don't need a stack frame, then we probably don't need a
+  // virtual base register either.
+  if (!StackEst)
+    return false;
+
+  // Estimate an offset from the stack pointer.
+  // The incoming offset is relating to the SP at the start of the function,
+  // but when we access the local it'll be relative to the SP after local
+  // allocation, so adjust our SP-relative offset by that allocation size.
+  Offset += StackEst;
+
+  // The frame pointer will point to the end of the stack, so estimate the
+  // offset as the difference between the object offset and the FP location.
+  return !isFrameOffsetLegal(MI, Offset);
+}
+
+/// Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void PPCRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                             unsigned BaseReg, int FrameIdx,
+                             int64_t Offset) const {
+  unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+
+  MachineBasicBlock::iterator Ins = MBB->begin();
+  DebugLoc DL;                  // Defaults to "unknown"
+  if (Ins != MBB->end())
+    DL = Ins->getDebugLoc();
+
+  const MCInstrDesc &MCID = TII.get(ADDriOpc);
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const MachineFunction &MF = *MBB->getParent();
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+  BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+    .addFrameIndex(FrameIdx).addImm(Offset);
+}
+
+void
+PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                   unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+
+  unsigned FIOperandNum = 0;
+  while (!MI.getOperand(FIOperandNum).isFI()) {
+    ++FIOperandNum;
+    assert(FIOperandNum < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+  unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+
+  bool isIXAddr = usesIXAddr(MI);
+  if (!isIXAddr)
+    Offset += MI.getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+  // Figure out if the offset in the instruction is shifted right two bits.
+  if (isIXAddr)
+    Offset >>= 2;    // The actual encoded value has the low two bits zero.
+
+  MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+}
+
+bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                         int64_t Offset) const {
+  return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+         (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 7e6683e..7a48b4b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,10 @@ public:
     return true;
   }
 
+  virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+    return true;
+  }
+
   void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
   void lowerCRSpilling(MachineBasicBlock::iterator II,
                        unsigned FrameIndex) const;
@@ -77,6 +81,15 @@ public:
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = NULL) const;
 
+  // Support for virtual base registers.
+  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+  void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                    unsigned BaseReg, int FrameIdx,
+                                    int64_t Offset) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
+  bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index ae084aa..8d5838e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -759,7 +759,7 @@ def PPCA2Model : SchedMachineModel {
   let LoadLatency = 6; // Optimistic load latency assuming bypass.
                        // This is overriden by OperandCycles if the
                        // Itineraries are queried instead.
-  let MispredictPenalty = 6;
+  let MispredictPenalty = 13;
 
   let Itineraries = PPCA2Itineraries;
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index fe851c1..14dc794 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -86,8 +86,14 @@ public:
     return getTM<PPCTargetMachine>();
   }
 
+  const PPCSubtarget &getPPCSubtarget() const {
+    return *getPPCTargetMachine().getSubtargetImpl();
+  }
+
   virtual bool addPreRegAlloc();
+  virtual bool addILPOpts();
   virtual bool addInstSelector();
+  virtual bool addPreSched2();
   virtual bool addPreEmitPass();
 };
 } // namespace
@@ -103,13 +109,31 @@ bool PPCPassConfig::addPreRegAlloc() {
   return false;
 }
 
+bool PPCPassConfig::addILPOpts() {
+  if (getPPCSubtarget().hasISEL()) {
+    addPass(&EarlyIfConverterID);
+    return true;
+  }
+
+  return false;
+}
+
 bool PPCPassConfig::addInstSelector() {
   // Install an instruction selector.
   addPass(createPPCISelDag(getPPCTargetMachine()));
   return false;
 }
 
+bool PPCPassConfig::addPreSched2() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&IfConverterID);
+
+  return true;
+}
+
 bool PPCPassConfig::addPreEmitPass() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCEarlyReturnPass());
   // Must run branch selection immediately preceding the asm printer.
   addPass(createPPCBranchSelectionPass());
   return false;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h
index 0b01433..9792bd8 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.h
@@ -24,6 +24,7 @@ class AMDGPUTargetMachine;
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
 FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600Packetizer(TargetMachine &tm);
 FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
 
 // SI Passes
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
index f600144..4c35ecf 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -19,9 +19,16 @@
 
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPU.h"
+#include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
@@ -50,15 +57,82 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   if (OutStreamer.hasRawTextSupport()) {
     OutStreamer.EmitRawText("@" + MF.getName() + ":");
   }
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
+  const MCSectionELF *ConfigSection = getObjFileLowering().getContext()
+                                              .getELFSection(".AMDGPU.config",
+                                              ELF::SHT_PROGBITS, 0,
+                                              SectionKind::getReadOnly());
+  OutStreamer.SwitchSection(ConfigSection);
   if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-    EmitProgramInfo(MF);
+    EmitProgramInfoSI(MF);
+  } else {
+    EmitProgramInfoR600(MF);
   }
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
   EmitFunctionBody();
   return false;
 }
 
-void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
+  unsigned MaxGPR = 0;
+  bool killPixel = false;
+  const R600RegisterInfo * RI =
+                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
+  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                    I != E; ++I) {
+      MachineInstr &MI = *I;
+      if (MI.getOpcode() == AMDGPU::KILLGT)
+        killPixel = true;
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        MachineOperand & MO = MI.getOperand(op_idx);
+        if (!MO.isReg())
+          continue;
+        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+
+        // Register with value > 127 aren't GPR
+        if (HWReg > 127)
+          continue;
+        MaxGPR = std::max(MaxGPR, HWReg);
+      }
+    }
+  }
+
+  unsigned RsrcReg;
+  if (STM.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX) {
+    // Evergreen / Northern Islands
+    switch (MFI->ShaderType) {
+    default: // Fall through
+    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
+    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
+    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
+    }
+  } else {
+    // R600 / R700
+    switch (MFI->ShaderType) {
+    default: // Fall through
+    case ShaderType::GEOMETRY: // Fall through
+    case ShaderType::COMPUTE:  // Fall through
+    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
+    }
+  }
+
+  OutStreamer.EmitIntValue(RsrcReg, 4);
+  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+                           S_STACK_SIZE(MFI->StackSize), 4);
+  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
   unsigned MaxSGPR = 0;
   unsigned MaxVGPR = 0;
   bool VCCUsed = false;
@@ -107,6 +181,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
         } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
           isSGPR = false;
           width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 3;
         } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
           isSGPR = true;
           width = 4;
@@ -139,7 +216,19 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
     MaxSGPR += 2;
   }
   SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
-  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
-  OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+  unsigned RsrcReg;
+  switch (MFI->ShaderType) {
+  default: // Fall through
+  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
+  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
+  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
+  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
+  }
+
+  OutStreamer.EmitIntValue(RsrcReg, 4);
+  OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+  if (MFI->ShaderType == ShaderType::PIXEL) {
+    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+  }
 }
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
index 3812282..f425ef4 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -33,7 +33,8 @@ public:
 
   /// \brief Emit register usage information so that the GPU driver
   /// can correctly setup the GPU state.
-  void EmitProgramInfo(MachineFunction &MF);
+  void EmitProgramInfoR600(MachineFunction &MF);
+  void EmitProgramInfoSI(MachineFunction &MF);
 
   /// Implemented in AMDGPUMCInstLower.cpp
   virtual void EmitInstruction(const MachineInstr *MI);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
index 45ae37e..9c30515 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
@@ -32,8 +32,14 @@ def CC_SI : CallingConv<[
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
     VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
-  ]>>>
+  ]>>>,
 
+  // This is the default for i64 values.
+  // XXX: We should change this once clang understands the CC_AMDGPU.
+  CCIfType<[i64], CCAssignToRegWithShadow<
+   [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+   [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  >>
 ]>;
 
 def CC_AMDGPU : CallingConv<[
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
index f31b646..c2a79ea 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -116,6 +116,7 @@ enum {
   BRANCH_COND,
   // End AMDIL ISD Opcodes
   BITALIGN,
+  BUFFER_STORE,
   DWORDADDR,
   FRACT,
   FMAX,
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
index e740348..d2620b2 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
@@ -94,6 +94,7 @@ class Constants {
 int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
 int TWO_PI_INV = 0x3e22f983;
+int FP_UINT_MAX_PLUS_1 = 0x4f800000;	// 1 << 32 in floating point encoding
 }
 def CONST : Constants;
 
@@ -115,21 +116,21 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
   (ins rc:$src0),
   "CLAMP $dst, $src0",
-  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+  [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
 >;
 
 class FABS <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
   (ins rc:$src0),
   "FABS $dst, $src0",
-  [(set rc:$dst, (fabs rc:$src0))]
+  [(set f32:$dst, (fabs f32:$src0))]
 >;
 
 class FNEG <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
   (ins rc:$src0),
   "FNEG $dst, $src0",
-  [(set rc:$dst, (fneg rc:$src0))]
+  [(set f32:$dst, (fneg f32:$src0))]
 >;
 
 } // usesCustomInserter = 1
@@ -140,8 +141,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
     (outs dstClass:$dst),
     (ins addrClass:$addr, i32imm:$chan),
     "RegisterLoad $dst, $addr",
-    [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
-                                                    (i32 timm:$chan)))]
+    [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
   > {
     let isRegisterLoad = 1;
   }
@@ -150,7 +150,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
     (outs),
     (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
     "RegisterStore $val, $addr",
-    [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+    [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
   > {
     let isRegisterStore = 1;
   }
@@ -161,105 +161,140 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
 /* Generic helper patterns for intrinsics */
 /* -------------------------------------- */
 
-class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
-                  RegisterClass rc> : Pat <
-  (fpow rc:$src0, rc:$src1),
-  (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
+  : Pat <
+  (fpow f32:$src0, f32:$src1),
+  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
 >;
 
 /* Other helper patterns */
 /* --------------------- */
 
 /* Extract element pattern */
-class Extract_Element <ValueType sub_type, ValueType vec_type,
-                     RegisterClass vec_class, int sub_idx, 
-                     SubRegIndex sub_reg>: Pat<
-  (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
-  (EXTRACT_SUBREG vec_class:$src, sub_reg)
+class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 
+                       SubRegIndex sub_reg>
+  : Pat<
+  (sub_type (vector_extract vec_type:$src, sub_idx)),
+  (EXTRACT_SUBREG $src, sub_reg)
 >;
 
 /* Insert element pattern */
 class Insert_Element <ValueType elem_type, ValueType vec_type,
-                      RegisterClass elem_class, RegisterClass vec_class,
-                      int sub_idx, SubRegIndex sub_reg> : Pat <
-
-  (vec_type (vector_insert (vec_type vec_class:$vec),
-                           (elem_type elem_class:$elem), sub_idx)),
-  (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+                      int sub_idx, SubRegIndex sub_reg>
+  : Pat <
+  (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
+  (INSERT_SUBREG $vec, $elem, sub_reg)
 >;
 
 // Vector Build pattern
-class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$src))),
-  (vecType elemClass:$src)
+class Vector1_Build <ValueType vecType, ValueType elemType,
+                     RegisterClass rc> : Pat <
+  (vecType (build_vector elemType:$src)),
+  (vecType (COPY_TO_REGCLASS $src, rc))
 >;
 
-class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+class Vector2_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$sub0, elemType:$sub1)),
   (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+    (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1)
 >;
 
-class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
-                         (elemType elemClass:$z), (elemType elemClass:$w))),
+class Vector4_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
-                            elemClass:$z, sub2), elemClass:$w, sub3)
+    (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3)
 >;
 
-class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
-                         (elemType elemClass:$sub2), (elemType elemClass:$sub3),
-                         (elemType elemClass:$sub4), (elemType elemClass:$sub5),
-                         (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+class Vector8_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$sub0, elemType:$sub1,
+                         elemType:$sub2, elemType:$sub3,
+                         elemType:$sub4, elemType:$sub5,
+                         elemType:$sub6, elemType:$sub7)),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
-                            elemClass:$sub2, sub2), elemClass:$sub3, sub3),
-                            elemClass:$sub4, sub4), elemClass:$sub5, sub5),
-                            elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
+                              $sub2, sub2), $sub3, sub3),
+                              $sub4, sub4), $sub5, sub5),
+                              $sub6, sub6), $sub7, sub7)
 >;
 
-class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
-                      ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
-                         (elemType elemClass:$sub2), (elemType elemClass:$sub3),
-                         (elemType elemClass:$sub4), (elemType elemClass:$sub5),
-                         (elemType elemClass:$sub6), (elemType elemClass:$sub7),
-                         (elemType elemClass:$sub8), (elemType elemClass:$sub9),
-                         (elemType elemClass:$sub10), (elemType elemClass:$sub11),
-                         (elemType elemClass:$sub12), (elemType elemClass:$sub13),
-                         (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+class Vector16_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$sub0, elemType:$sub1,
+                         elemType:$sub2, elemType:$sub3,
+                         elemType:$sub4, elemType:$sub5,
+                         elemType:$sub6, elemType:$sub7,
+                         elemType:$sub8, elemType:$sub9,
+                         elemType:$sub10, elemType:$sub11,
+                         elemType:$sub12, elemType:$sub13,
+                         elemType:$sub14, elemType:$sub15)),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
-                            elemClass:$sub2, sub2), elemClass:$sub3, sub3),
-                            elemClass:$sub4, sub4), elemClass:$sub5, sub5),
-                            elemClass:$sub6, sub6), elemClass:$sub7, sub7),
-                            elemClass:$sub8, sub8), elemClass:$sub9, sub9),
-                            elemClass:$sub10, sub10), elemClass:$sub11, sub11),
-                            elemClass:$sub12, sub12), elemClass:$sub13, sub13),
-                            elemClass:$sub14, sub14), elemClass:$sub15, sub15)
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
+                            $sub2, sub2), $sub3, sub3),
+                            $sub4, sub4), $sub5, sub5),
+                            $sub6, sub6), $sub7, sub7),
+                            $sub8, sub8), $sub9, sub9),
+                            $sub10, sub10), $sub11, sub11),
+                            $sub12, sub12), $sub13, sub13),
+                            $sub14, sub14), $sub15, sub15)
 >;
 
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
 // bitconvert pattern
 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
   (dt (bitconvert (st rc:$src0))),
   (dt rc:$src0)
 >;
 
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
 class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
   (vt (AMDGPUdwordaddr (vt rc:$addr))),
   (vt rc:$addr)
 >;
 
+// BFI_INT patterns
+
+multiclass BFIPatterns <Instruction BFI_INT> {
+
+  // Definition from ISA doc:
+  // (y & x) | (z & ~x)
+  def : Pat <
+    (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+  // SHA-256 Ch function
+  // z ^ (x & (y ^ z))
+  def : Pat <
+    (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+}
+
+// SHA-256 Ma patterns
+
+// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
+class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
+  (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
+  (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
+>;
+
+// Bitfield extract patterns
+
+def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
+def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
+                            SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
+
+class BFEPattern <Instruction BFE> : Pat <
+  (and (srl i32:$x, legalshift32:$y), bfemask:$z),
+  (BFE $x, $y, $z)
+>;
+
 include "R600Instructions.td"
 
 include "SIInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
index 0223ec8..0461025 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -1,4 +1,5 @@
 #include "AMDGPUMachineFunction.h"
+#include "AMDGPU.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 
@@ -8,6 +9,7 @@ const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
 
 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
     MachineFunctionInfo() {
+  ShaderType = ShaderType::COMPUTE;
   AttributeSet Set = MF.getFunction()->getAttributes();
   Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
                                  ShaderTypeAttribute);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
index 0f356a1..a7e1d7b 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -33,6 +33,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
   DefaultSize[0] = 64;
   DefaultSize[1] = 1;
   DefaultSize[2] = 1;
+  HasVertexCache = false;
   ParseSubtargetFeatures(GPU, FS);
   DevName = GPU;
   Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
@@ -53,6 +54,10 @@ AMDGPUSubtarget::is64bit() const  {
   return Is64bit;
 }
 bool
+AMDGPUSubtarget::hasVertexCache() const {
+  return HasVertexCache;
+}
+bool
 AMDGPUSubtarget::isTargetELF() const {
   return false;
 }
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
index 1973fc6..b6501a4 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -36,6 +36,7 @@ private:
   bool Is32on64bit;
   bool DumpCode;
   bool R600ALUInst;
+  bool HasVertexCache;
 
   InstrItineraryData InstrItins;
 
@@ -48,6 +49,7 @@ public:
 
   bool isOverride(AMDGPUDeviceInfo::Caps) const;
   bool is64bit() const;
+  bool hasVertexCache() const;
 
   // Helper functions to simplify if statements
   bool isTargetELF() const;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
index e7ea876..31fbf32 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -115,7 +115,6 @@ AMDGPUPassConfig::addPreISel() {
 }
 
 bool AMDGPUPassConfig::addInstSelector() {
-  addPass(createAMDGPUPeepholeOpt(*TM));
   addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
 
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
@@ -153,8 +152,9 @@ bool AMDGPUPassConfig::addPreEmitPass() {
     addPass(createAMDGPUCFGStructurizerPass(*TM));
     addPass(createR600EmitClauseMarkers(*TM));
     addPass(createR600ExpandSpecialInstrsPass(*TM));
-    addPass(createR600ControlFlowFinalizer(*TM));
     addPass(&FinalizeMachineBundlesID);
+    addPass(createR600Packetizer(*TM));
+    addPass(createR600ControlFlowFinalizer(*TM));
   } else {
     addPass(createSILowerControlFlowPass(*TM));
   }
diff --git a/contrib/llvm/lib/Target/R600/AMDILBase.td b/contrib/llvm/lib/Target/R600/AMDILBase.td
index c12cedc..e221110 100644
--- a/contrib/llvm/lib/Target/R600/AMDILBase.td
+++ b/contrib/llvm/lib/Target/R600/AMDILBase.td
@@ -74,6 +74,10 @@ def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
         "false",
         "Older version of ALU instructions encoding.">;
 
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+        "HasVertexCache",
+        "true",
+        "Specify use of dedicated vertex cache.">;
 
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
diff --git a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
index 9605fbe..126514b 100644
--- a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -44,7 +44,7 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
           " on 32bit pointers!");
 #endif
     return new AMDGPUEvergreenDevice(ptr);
-  } else if (deviceName == "redwood") {
+  } else if (deviceName == "redwood" || deviceName == "sumo") {
 #if DEBUG
     assert(!is64bit && "This device does not support 64bit pointers!");
     assert(!is64on32bit && "This device does not support 64bit"
@@ -79,7 +79,10 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
           " on 32bit pointers!");
 #endif
     return new AMDGPUNIDevice(ptr);
-  } else if (deviceName == "SI") {
+  } else if (deviceName == "SI" ||
+             deviceName == "tahiti" || deviceName == "pitcairn" ||
+             deviceName == "verde"  || deviceName == "oland" ||
+	     deviceName == "hainan") {
     return new AMDGPUSIDevice(ptr);
   } else {
 #if DEBUG
diff --git a/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
index fa8f62d..ba75a44 100644
--- a/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -191,6 +191,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
         RegSeqArgs, 2 * N->getNumOperands() + 1);
   }
+  case ISD::BUILD_PAIR: {
+    SDValue RC, SubReg0, SubReg1;
+    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+      break;
+    }
+    if (N->getValueType(0) == MVT::i128) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+    } else if (N->getValueType(0) == MVT::i64) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+    } else {
+      llvm_unreachable("Unhandled value type for BUILD_PAIR");
+    }
+    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
+                            N->getOperand(1), SubReg1 };
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                  N->getDebugLoc(), N->getValueType(0), Ops);
+  }
+
   case ISD::ConstantFP:
   case ISD::Constant: {
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
diff --git a/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp
deleted file mode 100644
index 3a28038..0000000
--- a/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp
+++ /dev/null
@@ -1,1215 +0,0 @@
-//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//==-----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "PeepholeOpt"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME 0
-#endif
-
-#include "AMDILDevices.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-
-#include <sstream>
-
-#if 0
-STATISTIC(PointerAssignments, "Number of dynamic pointer "
-    "assigments discovered");
-STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
-#endif
-
-using namespace llvm;
-// The Peephole optimization pass is used to do simple last minute optimizations
-// that are required for correct code or to remove redundant functions
-namespace {
-
-class OpaqueType;
-
-class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
-public:
-  TargetMachine &TM;
-  static char ID;
-  AMDGPUPeepholeOpt(TargetMachine &tm);
-  ~AMDGPUPeepholeOpt();
-  const char *getPassName() const;
-  bool runOnFunction(Function &F);
-  bool doInitialization(Module &M);
-  bool doFinalization(Module &M);
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-protected:
-private:
-  // Function to initiate all of the instruction level optimizations.
-  bool instLevelOptimizations(BasicBlock::iterator *inst);
-  // Quick check to see if we need to dump all of the pointers into the
-  // arena. If this is correct, then we set all pointers to exist in arena. This
-  // is a workaround for aliasing of pointers in a struct/union.
-  bool dumpAllIntoArena(Function &F);
-  // Because I don't want to invalidate any pointers while in the
-  // safeNestedForEachFunction. I push atomic conversions to a vector and handle
-  // it later. This function does the conversions if required.
-  void doAtomicConversionIfNeeded(Function &F);
-  // Because __amdil_is_constant cannot be properly evaluated if
-  // optimizations are disabled, the call's are placed in a vector
-  // and evaluated after the __amdil_image* functions are evaluated
-  // which should allow the __amdil_is_constant function to be
-  // evaluated correctly.
-  void doIsConstCallConversionIfNeeded();
-  bool mChanged;
-  bool mDebug;
-  bool mConvertAtomics;
-  CodeGenOpt::Level optLevel;
-  // Run a series of tests to see if we can optimize a CALL instruction.
-  bool optimizeCallInst(BasicBlock::iterator *bbb);
-  // A peephole optimization to optimize bit extract sequences.
-  bool optimizeBitExtract(Instruction *inst);
-  // A peephole optimization to optimize bit insert sequences.
-  bool optimizeBitInsert(Instruction *inst);
-  bool setupBitInsert(Instruction *base, 
-                      Instruction *&src, 
-                      Constant *&mask, 
-                      Constant *&shift);
-  // Expand the bit field insert instruction on versions of OpenCL that
-  // don't support it.
-  bool expandBFI(CallInst *CI);
-  // Expand the bit field mask instruction on version of OpenCL that 
-  // don't support it.
-  bool expandBFM(CallInst *CI);
-  // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
-  // this case we need to expand them. These functions check for 24bit functions
-  // and then expand.
-  bool isSigned24BitOps(CallInst *CI);
-  void expandSigned24BitOps(CallInst *CI);
-  // One optimization that can occur is that if the required workgroup size is
-  // specified then the result of get_local_size is known at compile time and
-  // can be returned accordingly.
-  bool isRWGLocalOpt(CallInst *CI);
-  // On northern island cards, the division is slightly less accurate than on
-  // previous generations, so we need to utilize a more accurate division. So we
-  // can translate the accurate divide to a normal divide on all other cards.
-  bool convertAccurateDivide(CallInst *CI);
-  void expandAccurateDivide(CallInst *CI);
-  // If the alignment is set incorrectly, it can produce really inefficient
-  // code. This checks for this scenario and fixes it if possible.
-  bool correctMisalignedMemOp(Instruction *inst);
-
-  // If we are in no opt mode, then we need to make sure that
-  // local samplers are properly propagated as constant propagation 
-  // doesn't occur and we need to know the value of kernel defined
-  // samplers at compile time.
-  bool propagateSamplerInst(CallInst *CI);
-
-  // Helper functions
-
-  // Group of functions that recursively calculate the size of a structure based
-  // on it's sub-types.
-  size_t getTypeSize(Type * const T, bool dereferencePtr = false);
-  size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
-  size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
-  size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
-  size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
-  size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
-  size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
-  size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
-
-  LLVMContext *mCTX;
-  Function *mF;
-  const AMDGPUSubtarget *mSTM;
-  SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
-  SmallVector<CallInst *, 16> isConstVec;
-}; // class AMDGPUPeepholeOpt
-  char AMDGPUPeepholeOpt::ID = 0;
-
-// A template function that has two levels of looping before calling the
-// function with a pointer to the current iterator.
-template<class InputIterator, class SecondIterator, class Function>
-Function safeNestedForEach(InputIterator First, InputIterator Last,
-                              SecondIterator S, Function F) {
-  for ( ; First != Last; ++First) {
-    SecondIterator sf, sl;
-    for (sf = First->begin(), sl = First->end();
-         sf != sl; )  {
-      if (!F(&sf)) {
-        ++sf;
-      } 
-    }
-  }
-  return F;
-}
-
-} // anonymous namespace
-
-namespace llvm {
-  FunctionPass *
-  createAMDGPUPeepholeOpt(TargetMachine &tm) {
-    return new AMDGPUPeepholeOpt(tm);
-  }
-} // llvm namespace
-
-AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
-  : FunctionPass(ID), TM(tm)  {
-  mDebug = DEBUGME;
-  optLevel = TM.getOptLevel();
-
-}
-
-AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()  {
-}
-
-const char *
-AMDGPUPeepholeOpt::getPassName() const  {
-  return "AMDGPU PeepHole Optimization Pass";
-}
-
-bool 
-containsPointerType(Type *Ty)  {
-  if (!Ty) {
-    return false;
-  }
-  switch(Ty->getTypeID()) {
-  default:
-    return false;
-  case Type::StructTyID: {
-    const StructType *ST = dyn_cast<StructType>(Ty);
-    for (StructType::element_iterator stb = ST->element_begin(),
-           ste = ST->element_end(); stb != ste; ++stb) {
-      if (!containsPointerType(*stb)) {
-        continue;
-      }
-      return true;
-    }
-    break;
-  }
-  case Type::VectorTyID:
-  case Type::ArrayTyID:
-    return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
-  case Type::PointerTyID:
-    return true;
-  };
-  return false;
-}
-
-bool 
-AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)  {
-  bool dumpAll = false;
-  for (Function::const_arg_iterator cab = F.arg_begin(),
-       cae = F.arg_end(); cab != cae; ++cab) {
-    const Argument *arg = cab;
-    const PointerType *PT = dyn_cast<PointerType>(arg->getType());
-    if (!PT) {
-      continue;
-    }
-    Type *DereferencedType = PT->getElementType();
-    if (!dyn_cast<StructType>(DereferencedType) 
-        ) {
-      continue;
-    }
-    if (!containsPointerType(DereferencedType)) {
-      continue;
-    }
-    // FIXME: Because a pointer inside of a struct/union may be aliased to
-    // another pointer we need to take the conservative approach and place all
-    // pointers into the arena until more advanced detection is implemented.
-    dumpAll = true;
-  }
-  return dumpAll;
-}
-void
-AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
-  if (isConstVec.empty()) {
-    return;
-  }
-  for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
-    CallInst *CI = isConstVec[x];
-    Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
-    Type *aType = Type::getInt32Ty(*mCTX);
-    Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
-      : ConstantInt::get(aType, 0);
-    CI->replaceAllUsesWith(Val);
-    CI->eraseFromParent();
-  }
-  isConstVec.clear();
-}
-void 
-AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)  {
-  // Don't do anything if we don't have any atomic operations.
-  if (atomicFuncs.empty()) {
-    return;
-  }
-  // Change the function name for the atomic if it is required
-  uint32_t size = atomicFuncs.size();
-  for (uint32_t x = 0; x < size; ++x) {
-    atomicFuncs[x].first->setOperand(
-        atomicFuncs[x].first->getNumOperands()-1, 
-        atomicFuncs[x].second);
-
-  }
-  mChanged = true;
-  if (mConvertAtomics) {
-    return;
-  }
-}
-
-bool 
-AMDGPUPeepholeOpt::runOnFunction(Function &MF)  {
-  mChanged = false;
-  mF = &MF;
-  mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
-  if (mDebug) {
-    MF.dump();
-  }
-  mCTX = &MF.getType()->getContext();
-  mConvertAtomics = true;
-  safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
-     std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
-                  this));
-
-  doAtomicConversionIfNeeded(MF);
-  doIsConstCallConversionIfNeeded();
-
-  if (mDebug) {
-    MF.dump();
-  }
-  return mChanged;
-}
-
-bool 
-AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)  {
-  Instruction *inst = (*bbb);
-  CallInst *CI = dyn_cast<CallInst>(inst);
-  if (!CI) {
-    return false;
-  }
-  if (isSigned24BitOps(CI)) {
-    expandSigned24BitOps(CI);
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-  if (propagateSamplerInst(CI)) {
-    return false;
-  }
-  if (expandBFI(CI) || expandBFM(CI)) {
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-  if (convertAccurateDivide(CI)) {
-    expandAccurateDivide(CI);
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-
-  StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
-  if (calleeName.startswith("__amdil_is_constant")) {
-    // If we do not have optimizations, then this
-    // cannot be properly evaluated, so we add the
-    // call instruction to a vector and process
-    // them at the end of processing after the
-    // samplers have been correctly handled.
-    if (optLevel == CodeGenOpt::None) {
-      isConstVec.push_back(CI);
-      return false;
-    } else {
-      Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
-      Type *aType = Type::getInt32Ty(*mCTX);
-      Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
-        : ConstantInt::get(aType, 0);
-      CI->replaceAllUsesWith(Val);
-      ++(*bbb);
-      CI->eraseFromParent();
-      return true;
-    }
-  }
-
-  if (calleeName.equals("__amdil_is_asic_id_i32")) {
-    ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
-    Type *aType = Type::getInt32Ty(*mCTX);
-    Value *Val = CV;
-    if (Val) {
-      Val = ConstantInt::get(aType, 
-          mSTM->device()->getDeviceFlag() & CV->getZExtValue());
-    } else {
-      Val = ConstantInt::get(aType, 0);
-    }
-    CI->replaceAllUsesWith(Val);
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-  Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
-  if (!F) {
-    return false;
-  } 
-  if (F->getName().startswith("__atom") && !CI->getNumUses() 
-      && F->getName().find("_xchg") == StringRef::npos) {
-    std::string buffer(F->getName().str() + "_noret");
-    F = dyn_cast<Function>(
-          F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
-    atomicFuncs.push_back(std::make_pair(CI, F));
-  }
-  
-  if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
-      && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
-    return false;
-  }
-  if (!mConvertAtomics) {
-    return false;
-  }
-  StringRef name = F->getName();
-  if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
-    mConvertAtomics = false;
-  }
-  return false;
-}
-
-bool
-AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, 
-    Instruction *&src, 
-    Constant *&mask, 
-    Constant *&shift) {
-  if (!base) {
-    if (mDebug) {
-      dbgs() << "Null pointer passed into function.\n";
-    }
-    return false;
-  }
-  bool andOp = false;
-  if (base->getOpcode() == Instruction::Shl) {
-    shift = dyn_cast<Constant>(base->getOperand(1));
-  } else if (base->getOpcode() == Instruction::And) {
-    mask = dyn_cast<Constant>(base->getOperand(1));
-    andOp = true;
-  } else {
-    if (mDebug) {
-      dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
-    }
-    // If the base is neither a Shl or a And, we don't fit any of the patterns above.
-    return false;
-  }
-  src = dyn_cast<Instruction>(base->getOperand(0));
-  if (!src) {
-    if (mDebug) {
-      dbgs() << "Failed setup since the base operand is not an instruction!\n";
-    }
-    return false;
-  }
-  // If we find an 'and' operation, then we don't need to
-  // find the next operation as we already know the
-  // bits that are valid at this point.
-  if (andOp) {
-    return true;
-  }
-  if (src->getOpcode() == Instruction::Shl && !shift) {
-    shift = dyn_cast<Constant>(src->getOperand(1));
-    src = dyn_cast<Instruction>(src->getOperand(0));
-  } else if (src->getOpcode() == Instruction::And && !mask) {
-    mask = dyn_cast<Constant>(src->getOperand(1));
-  }
-  if (!mask && !shift) {
-    if (mDebug) {
-      dbgs() << "Failed setup since both mask and shift are NULL!\n";
-    }
-    // Did not find a constant mask or a shift.
-    return false;
-  }
-  return true;
-}
-bool
-AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)  {
-  if (!inst) {
-    return false;
-  }
-  if (!inst->isBinaryOp()) {
-    return false;
-  }
-  if (inst->getOpcode() != Instruction::Or) {
-    return false;
-  }
-  if (optLevel == CodeGenOpt::None) {
-    return false;
-  }
-  // We want to do an optimization on a sequence of ops that in the end equals a
-  // single ISA instruction.
-  // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
-  // Some simplified versions of this pattern are as follows:
-  // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
-  // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
-  // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
-  // (A & B) | (D << F) when (1 << F) >= B
-  // (A << C) | (D & E) when (1 << C) >= E
-  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
-    // The HD4XXX hardware doesn't support the ubit_insert instruction.
-    return false;
-  }
-  Type *aType = inst->getType();
-  bool isVector = aType->isVectorTy();
-  int numEle = 1;
-  // This optimization only works on 32bit integers.
-  if (aType->getScalarType()
-      != Type::getInt32Ty(inst->getContext())) {
-    return false;
-  }
-  if (isVector) {
-    const VectorType *VT = dyn_cast<VectorType>(aType);
-    numEle = VT->getNumElements();
-    // We currently cannot support more than 4 elements in a intrinsic and we
-    // cannot support Vec3 types.
-    if (numEle > 4 || numEle == 3) {
-      return false;
-    }
-  }
-  // TODO: Handle vectors.
-  if (isVector) {
-    if (mDebug) {
-      dbgs() << "!!! Vectors are not supported yet!\n";
-    }
-    return false;
-  }
-  Instruction *LHSSrc = NULL, *RHSSrc = NULL;
-  Constant *LHSMask = NULL, *RHSMask = NULL;
-  Constant *LHSShift = NULL, *RHSShift = NULL;
-  Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
-  Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
-  if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
-    if (mDebug) {
-      dbgs() << "Found an OR Operation that failed setup!\n";
-      inst->dump();
-      if (LHS) { LHS->dump(); }
-      if (LHSSrc) { LHSSrc->dump(); }
-      if (LHSMask) { LHSMask->dump(); }
-      if (LHSShift) { LHSShift->dump(); }
-    }
-    // There was an issue with the setup for BitInsert.
-    return false;
-  }
-  if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
-    if (mDebug) {
-      dbgs() << "Found an OR Operation that failed setup!\n";
-      inst->dump();
-      if (RHS) { RHS->dump(); }
-      if (RHSSrc) { RHSSrc->dump(); }
-      if (RHSMask) { RHSMask->dump(); }
-      if (RHSShift) { RHSShift->dump(); }
-    }
-    // There was an issue with the setup for BitInsert.
-    return false;
-  }
-  if (mDebug) {
-    dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
-    dbgs() << "Op:        "; inst->dump();
-    dbgs() << "LHS:       "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "LHS Src:   "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "LHS Mask:  "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS:       "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS Src:   "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS Mask:  "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
-  }
-  Constant *offset = NULL;
-  Constant *width = NULL;
-  uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
-  uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
-  uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
-  uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
-  lhsMaskVal = (LHSMask 
-      ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
-  rhsMaskVal = (RHSMask 
-      ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
-  lhsShiftVal = (LHSShift 
-      ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
-  rhsShiftVal = (RHSShift 
-      ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
-  lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
-  rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
-  lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
-  rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
-  // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
-  if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
-    return false;
-  }
-  if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
-    offset = ConstantInt::get(aType, lhsMaskOffset, false);
-    width = ConstantInt::get(aType, lhsMaskWidth, false);
-    RHSSrc = RHS;
-    if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
-      return false;
-    }
-    if (!LHSShift) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", LHS);
-    } else if (lhsShiftVal != lhsMaskOffset) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", LHS);
-    }
-    if (mDebug) {
-      dbgs() << "Optimizing LHS!\n";
-    }
-  } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
-    offset = ConstantInt::get(aType, rhsMaskOffset, false);
-    width = ConstantInt::get(aType, rhsMaskWidth, false);
-    LHSSrc = RHSSrc;
-    RHSSrc = LHS;
-    if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
-      return false;
-    }
-    if (!RHSShift) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", RHS);
-    } else if (rhsShiftVal != rhsMaskOffset) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", RHS);
-    }
-    if (mDebug) {
-      dbgs() << "Optimizing RHS!\n";
-    }
-  } else {
-    if (mDebug) {
-      dbgs() << "Failed constraint 3!\n";
-    }
-    return false;
-  }
-  if (mDebug) {
-    dbgs() << "Width:  "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
-    dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
-    dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
-    dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
-  }
-  if (!offset || !width) {
-    if (mDebug) {
-      dbgs() << "Either width or offset are NULL, failed detection!\n";
-    }
-    return false;
-  }
-  // Lets create the function signature.
-  std::vector<Type *> callTypes;
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
-  std::string name = "__amdil_ubit_insert";
-  if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
-  Function *Func = 
-    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
-        getOrInsertFunction(StringRef(name), funcType));
-  Value *Operands[4] = {
-    width,
-    offset,
-    LHSSrc,
-    RHSSrc
-  };
-  CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
-  if (mDebug) {
-    dbgs() << "Old Inst: ";
-    inst->dump();
-    dbgs() << "New Inst: ";
-    CI->dump();
-    dbgs() << "\n\n";
-  }
-  CI->insertBefore(inst);
-  inst->replaceAllUsesWith(CI);
-  return true;
-}
-
-bool 
-AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)  {
-  if (!inst) {
-    return false;
-  }
-  if (!inst->isBinaryOp()) {
-    return false;
-  }
-  if (inst->getOpcode() != Instruction::And) {
-    return false;
-  }
-  if (optLevel == CodeGenOpt::None) {
-    return false;
-  }
-  // We want to do some simple optimizations on Shift right/And patterns. The
-  // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
-  // value smaller than 32 and C is a mask. If C is a constant value, then the
-  // following transformation can occur. For signed integers, it turns into the
-  // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
-  // integers, it turns into the function call dst =
-  // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
-  // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
-  // Evergreen hardware.
-  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
-    // This does not work on HD4XXX hardware.
-    return false;
-  }
-  Type *aType = inst->getType();
-  bool isVector = aType->isVectorTy();
-
-  // XXX Support vector types
-  if (isVector) {
-    return false;
-  }
-  int numEle = 1;
-  // This only works on 32bit integers
-  if (aType->getScalarType()
-      != Type::getInt32Ty(inst->getContext())) {
-    return false;
-  }
-  if (isVector) {
-    const VectorType *VT = dyn_cast<VectorType>(aType);
-    numEle = VT->getNumElements();
-    // We currently cannot support more than 4 elements in a intrinsic and we
-    // cannot support Vec3 types.
-    if (numEle > 4 || numEle == 3) {
-      return false;
-    }
-  }
-  BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
-  // If the first operand is not a shift instruction, then we can return as it
-  // doesn't match this pattern.
-  if (!ShiftInst || !ShiftInst->isShift()) {
-    return false;
-  }
-  // If we are a shift left, then we need don't match this pattern.
-  if (ShiftInst->getOpcode() == Instruction::Shl) {
-    return false;
-  }
-  bool isSigned = ShiftInst->isArithmeticShift();
-  Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
-  Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
-  // Lets make sure that the shift value and the and mask are constant integers.
-  if (!AndMask || !ShrVal) {
-    return false;
-  }
-  Constant *newMaskConst;
-  Constant *shiftValConst;
-  if (isVector) {
-    // Handle the vector case
-    std::vector<Constant *> maskVals;
-    std::vector<Constant *> shiftVals;
-    ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
-    ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
-    Type *scalarType = AndMaskVec->getType()->getScalarType();
-    assert(AndMaskVec->getNumOperands() ==
-           ShrValVec->getNumOperands() && "cannot have a "
-           "combination where the number of elements to a "
-           "shift and an and are different!");
-    for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
-      ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
-      ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
-      if (!AndCI || !ShiftIC) {
-        return false;
-      }
-      uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
-      if (!isMask_32(maskVal)) {
-        return false;
-      }
-      maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
-      uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
-      // If the mask or shiftval is greater than the bitcount, then break out.
-      if (maskVal >= 32 || shiftVal >= 32) {
-        return false;
-      }
-      // If the mask val is greater than the the number of original bits left
-      // then this optimization is invalid.
-      if (maskVal > (32 - shiftVal)) {
-        return false;
-      }
-      maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
-      shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
-    }
-    newMaskConst = ConstantVector::get(maskVals);
-    shiftValConst = ConstantVector::get(shiftVals);
-  } else {
-    // Handle the scalar case
-    uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
-    // This must be a mask value where all lower bits are set to 1 and then any
-    // bit higher is set to 0.
-    if (!isMask_32(maskVal)) {
-      return false;
-    }
-    maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
-    // Count the number of bits set in the mask, this is the width of the
-    // resulting bit set that is extracted from the source value.
-    uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
-    // If the mask or shift val is greater than the bitcount, then break out.
-    if (maskVal >= 32 || shiftVal >= 32) {
-      return false;
-    }
-    // If the mask val is greater than the the number of original bits left then
-    // this optimization is invalid.
-    if (maskVal > (32 - shiftVal)) {
-      return false;
-    }
-    newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
-    shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
-  }
-  // Lets create the function signature.
-  std::vector<Type *> callTypes;
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
-  std::string name = "llvm.AMDGPU.bit.extract.u32";
-  if (isVector) {
-    name += ".v" + itostr(numEle) + "i32";
-  } else {
-    name += ".";
-  }
-  // Lets create the function.
-  Function *Func = 
-    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
-                       getOrInsertFunction(StringRef(name), funcType));
-  Value *Operands[3] = {
-    ShiftInst->getOperand(0),
-    shiftValConst,
-    newMaskConst
-  };
-  // Lets create the Call with the operands
-  CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
-  CI->setDoesNotAccessMemory();
-  CI->insertBefore(inst);
-  inst->replaceAllUsesWith(CI);
-  return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
-  if (!CI) {
-    return false;
-  }
-  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-  if (!LHS->getName().startswith("__amdil_bfi")) {
-    return false;
-  }
-  Type* type = CI->getOperand(0)->getType();
-  Constant *negOneConst = NULL;
-  if (type->isVectorTy()) {
-    std::vector<Constant *> negOneVals;
-    negOneConst = ConstantInt::get(CI->getContext(), 
-        APInt(32, StringRef("-1"), 10));
-    for (size_t x = 0,
-        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
-      negOneVals.push_back(negOneConst);
-    }
-    negOneConst = ConstantVector::get(negOneVals);
-  } else {
-    negOneConst = ConstantInt::get(CI->getContext(), 
-        APInt(32, StringRef("-1"), 10));
-  }
-  // __amdil_bfi => (A & B) | (~A & C)
-  BinaryOperator *lhs = 
-    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
-        CI->getOperand(1), "bfi_and", CI);
-  BinaryOperator *rhs =
-    BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
-        "bfi_not", CI);
-  rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
-      "bfi_and", CI);
-  lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
-  CI->replaceAllUsesWith(lhs);
-  return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
-  if (!CI) {
-    return false;
-  }
-  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-  if (!LHS->getName().startswith("__amdil_bfm")) {
-    return false;
-  }
-  // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
-  Constant *newMaskConst = NULL;
-  Constant *newShiftConst = NULL;
-  Type* type = CI->getOperand(0)->getType();
-  if (type->isVectorTy()) {
-    std::vector<Constant*> newMaskVals, newShiftVals;
-    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
-    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
-    for (size_t x = 0,
-        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
-      newMaskVals.push_back(newMaskConst);
-      newShiftVals.push_back(newShiftConst);
-    }
-    newMaskConst = ConstantVector::get(newMaskVals);
-    newShiftConst = ConstantVector::get(newShiftVals);
-  } else {
-    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
-    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
-  }
-  BinaryOperator *lhs =
-    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
-        newMaskConst, "bfm_mask", CI);
-  lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
-      lhs, "bfm_shl", CI);
-  lhs = BinaryOperator::Create(Instruction::Sub, lhs,
-      newShiftConst, "bfm_sub", CI);
-  BinaryOperator *rhs =
-    BinaryOperator::Create(Instruction::And, CI->getOperand(1),
-        newMaskConst, "bfm_mask", CI);
-  lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
-  CI->replaceAllUsesWith(lhs);
-  return true;
-}
-
-bool
-AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)  {
-  Instruction *inst = (*bbb);
-  if (optimizeCallInst(bbb)) {
-    return true;
-  }
-  if (optimizeBitExtract(inst)) {
-    return false;
-  }
-  if (optimizeBitInsert(inst)) {
-    return false;
-  }
-  if (correctMisalignedMemOp(inst)) {
-    return false;
-  }
-  return false;
-}
-bool
-AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
-  LoadInst *linst = dyn_cast<LoadInst>(inst);
-  StoreInst *sinst = dyn_cast<StoreInst>(inst);
-  unsigned alignment;
-  Type* Ty = inst->getType();
-  if (linst) {
-    alignment = linst->getAlignment();
-    Ty = inst->getType();
-  } else if (sinst) {
-    alignment = sinst->getAlignment();
-    Ty = sinst->getValueOperand()->getType();
-  } else {
-    return false;
-  }
-  unsigned size = getTypeSize(Ty);
-  if (size == alignment || size < alignment) {
-    return false;
-  }
-  if (!Ty->isStructTy()) {
-    return false;
-  }
-  if (alignment < 4) {
-    if (linst) {
-      linst->setAlignment(0);
-      return true;
-    } else if (sinst) {
-      sinst->setAlignment(0);
-      return true;
-    }
-  }
-  return false;
-}
-bool 
-AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)  {
-  if (!CI) {
-    return false;
-  }
-  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-  std::string namePrefix = LHS->getName().substr(0, 14);
-  if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
-      && namePrefix != "__amdil__imul24_high") {
-    return false;
-  }
-  if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
-    return false;
-  }
-  return true;
-}
-
-void 
-AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)  {
-  assert(isSigned24BitOps(CI) && "Must be a "
-      "signed 24 bit operation to call this function!");
-  Value *LHS = CI->getOperand(CI->getNumOperands()-1);
-  // On 7XX and 8XX we do not have signed 24bit, so we need to
-  // expand it to the following:
-  // imul24 turns into 32bit imul
-  // imad24 turns into 32bit imad
-  // imul24_high turns into 32bit imulhigh
-  if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
-    Type *aType = CI->getOperand(0)->getType();
-    bool isVector = aType->isVectorTy();
-    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
-    std::vector<Type*> callTypes;
-    callTypes.push_back(CI->getOperand(0)->getType());
-    callTypes.push_back(CI->getOperand(1)->getType());
-    callTypes.push_back(CI->getOperand(2)->getType());
-    FunctionType *funcType =
-      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
-    std::string name = "__amdil_imad";
-    if (isVector) {
-      name += "_v" + itostr(numEle) + "i32";
-    } else {
-      name += "_i32";
-    }
-    Function *Func = dyn_cast<Function>(
-                       CI->getParent()->getParent()->getParent()->
-                       getOrInsertFunction(StringRef(name), funcType));
-    Value *Operands[3] = {
-      CI->getOperand(0),
-      CI->getOperand(1),
-      CI->getOperand(2)
-    };
-    CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
-    nCI->insertBefore(CI);
-    CI->replaceAllUsesWith(nCI);
-  } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
-    BinaryOperator *mulOp =
-      BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
-          CI->getOperand(1), "imul24", CI);
-    CI->replaceAllUsesWith(mulOp);
-  } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
-    Type *aType = CI->getOperand(0)->getType();
-
-    bool isVector = aType->isVectorTy();
-    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
-    std::vector<Type*> callTypes;
-    callTypes.push_back(CI->getOperand(0)->getType());
-    callTypes.push_back(CI->getOperand(1)->getType());
-    FunctionType *funcType =
-      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
-    std::string name = "__amdil_imul_high";
-    if (isVector) {
-      name += "_v" + itostr(numEle) + "i32";
-    } else {
-      name += "_i32";
-    }
-    Function *Func = dyn_cast<Function>(
-                       CI->getParent()->getParent()->getParent()->
-                       getOrInsertFunction(StringRef(name), funcType));
-    Value *Operands[2] = {
-      CI->getOperand(0),
-      CI->getOperand(1)
-    };
-    CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
-    nCI->insertBefore(CI);
-    CI->replaceAllUsesWith(nCI);
-  }
-}
-
-bool 
-AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)  {
-  return (CI != NULL
-          && CI->getOperand(CI->getNumOperands() - 1)->getName() 
-          == "__amdil_get_local_size_int");
-}
-
-bool 
-AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)  {
-  if (!CI) {
-    return false;
-  }
-  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
-      && (mSTM->getDeviceName() == "cayman")) {
-    return false;
-  }
-  return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) 
-      == "__amdil_improved_div";
-}
-
-void 
-AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)  {
-  assert(convertAccurateDivide(CI)
-         && "expanding accurate divide can only happen if it is expandable!");
-  BinaryOperator *divOp =
-    BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
-                           CI->getOperand(1), "fdiv32", CI);
-  CI->replaceAllUsesWith(divOp);
-}
-
-bool
-AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
-  if (optLevel != CodeGenOpt::None) {
-    return false;
-  }
-
-  if (!CI) {
-    return false;
-  }
-
-  unsigned funcNameIdx = 0;
-  funcNameIdx = CI->getNumOperands() - 1;
-  StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
-  if (calleeName != "__amdil_image2d_read_norm"
-   && calleeName != "__amdil_image2d_read_unnorm"
-   && calleeName != "__amdil_image3d_read_norm"
-   && calleeName != "__amdil_image3d_read_unnorm") {
-    return false;
-  }
-
-  unsigned samplerIdx = 2;
-  samplerIdx = 1;
-  Value *sampler = CI->getOperand(samplerIdx);
-  LoadInst *lInst = dyn_cast<LoadInst>(sampler);
-  if (!lInst) {
-    return false;
-  }
-
-  if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
-    return false;
-  }
-
-  GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
-  // If we are loading from what is not a global value, then we
-  // fail and return.
-  if (!gv) {
-    return false;
-  }
-
-  // If we don't have an initializer or we have an initializer and
-  // the initializer is not a 32bit integer, we fail.
-  if (!gv->hasInitializer() 
-      || !gv->getInitializer()->getType()->isIntegerTy(32)) {
-      return false;
-  }
-
-  // Now that we have the global variable initializer, lets replace
-  // all uses of the load instruction with the samplerVal and
-  // reparse the __amdil_is_constant() function.
-  Constant *samplerVal = gv->getInitializer();
-  lInst->replaceAllUsesWith(samplerVal);
-  return true;
-}
-
-bool 
-AMDGPUPeepholeOpt::doInitialization(Module &M)  {
-  return false;
-}
-
-bool 
-AMDGPUPeepholeOpt::doFinalization(Module &M)  {
-  return false;
-}
-
-void 
-AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const  {
-  AU.addRequired<MachineFunctionAnalysis>();
-  FunctionPass::getAnalysisUsage(AU);
-  AU.setPreservesAll();
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
-  size_t size = 0;
-  if (!T) {
-    return size;
-  }
-  switch (T->getTypeID()) {
-  case Type::X86_FP80TyID:
-  case Type::FP128TyID:
-  case Type::PPC_FP128TyID:
-  case Type::LabelTyID:
-    assert(0 && "These types are not supported by this backend");
-  default:
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-    size = T->getPrimitiveSizeInBits() >> 3;
-    break;
-  case Type::PointerTyID:
-    size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
-    break;
-  case Type::IntegerTyID:
-    size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
-    break;
-  case Type::StructTyID:
-    size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
-    break;
-  case Type::ArrayTyID:
-    size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
-    break;
-  case Type::FunctionTyID:
-    size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
-    break;
-  case Type::VectorTyID:
-    size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
-    break;
-  };
-  return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
-    bool dereferencePtr) {
-  size_t size = 0;
-  if (!ST) {
-    return size;
-  }
-  Type *curType;
-  StructType::element_iterator eib;
-  StructType::element_iterator eie;
-  for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
-    curType = *eib;
-    size += getTypeSize(curType, dereferencePtr);
-  }
-  return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
-    bool dereferencePtr) {
-  return IT ? (IT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
-    bool dereferencePtr) {
-    assert(0 && "Should not be able to calculate the size of an function type");
-    return 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
-    bool dereferencePtr) {
-  return (size_t)(AT ? (getTypeSize(AT->getElementType(),
-                                    dereferencePtr) * AT->getNumElements())
-                     : 0);
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
-    bool dereferencePtr) {
-  return VT ? (VT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
-    bool dereferencePtr) {
-  if (!PT) {
-    return 0;
-  }
-  Type *CT = PT->getElementType();
-  if (CT->getTypeID() == Type::StructTyID &&
-      PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
-    return getTypeSize(dyn_cast<StructType>(CT));
-  } else if (dereferencePtr) {
-    size_t size = 0;
-    for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
-      size += getTypeSize(PT->getContainedType(x), dereferencePtr);
-    }
-    return size;
-  } else {
-    return 4;
-  }
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
-    bool dereferencePtr) {
-  //assert(0 && "Should not be able to calculate the size of an opaque type");
-  return 4;
-}
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 10547a5..303cdf2 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -17,6 +17,7 @@ using namespace llvm;
 
 void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
                              StringRef Annot) {
+  OS.flush();
   printInstruction(MI, OS);
 
   printAnnotation(OS, Annot);
@@ -67,11 +68,14 @@ void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O, StringRef Asm) {
+                                   raw_ostream &O, StringRef Asm,
+                                   StringRef Default) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isImm());
   if (Op.getImm() == 1) {
     O << Asm;
+  } else {
+    O << Default;
   }
 }
 
@@ -98,7 +102,7 @@ void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
                                   raw_ostream &O) {
-  printIfSet(MI, OpNo, O, " *");
+  printIfSet(MI, OpNo, O.indent(20 - O.GetNumBytesInBuffer()), "*", " ");
 }
 
 void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
@@ -169,4 +173,41 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
     O << "." << chans[chan];
 }
 
+void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  int BankSwizzle = MI->getOperand(OpNo).getImm();
+  switch (BankSwizzle) {
+  case 1:
+    O << "BS:VEC_021";
+    break;
+  case 2:
+    O << "BS:VEC_120";
+    break;
+  case 3:
+    O << "BS:VEC_102";
+    break;
+  case 4:
+    O << "BS:VEC_201";
+    break;
+  case 5:
+    O << "BS:VEC_210";
+    break;
+  default:
+    break;
+  }
+  return;
+}
+
+void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  int KCacheMode = MI->getOperand(OpNo).getImm();
+  if (KCacheMode > 0) {
+    int KCacheBank = MI->getOperand(OpNo - 2).getImm();
+    O << "CB" << KCacheBank <<":";
+    int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
+    int LineSize = (KCacheMode == 1)?16:32;
+    O << KCacheAddr * 16 << "-" << KCacheAddr * 16 + LineSize;
+  }
+}
+
 #include "AMDGPUGenAsmWriter.inc"
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 767a708..c6fd053 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -35,7 +35,8 @@ private:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
+  void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                  StringRef Asm, StringRef Default = "");
   void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -47,6 +48,8 @@ private:
   void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 98fca43..a3397f3 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -44,7 +44,6 @@ public:
   AMDGPUAsmBackend(const Target &T)
     : MCAsmBackend() {}
 
-  virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
   virtual unsigned getNumFixupKinds() const { return 0; };
   virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                           uint64_t Value) const;
@@ -71,16 +70,6 @@ void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
   }
 }
 
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
-                                           StringRef CPU) {
-  return new AMDGPUAsmBackend(T);
-}
-
-AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
-                                                        raw_ostream &OS) const {
-  return new AMDGPUMCObjectWriter(OS);
-}
-
 void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                   unsigned DataSize, uint64_t Value) const {
 
@@ -88,3 +77,21 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   assert(Fixup.getKind() == FK_PCRel_4);
   *Dst = (Value - 4) / 4;
 }
+
+//===----------------------------------------------------------------------===//
+// ELFAMDGPUAsmBackend class
+//===----------------------------------------------------------------------===//
+
+class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+public:
+  ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createAMDGPUELFObjectWriter(OS);
+  }
+};
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+                                           StringRef CPU) {
+  return new ELFAMDGPUAsmBackend(T);
+}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
new file mode 100644
index 0000000..48fac9f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -0,0 +1,39 @@
+//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AMDGPUELFObjectWriter();
+protected:
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const {
+    llvm_unreachable("Not implemented");
+  }
+
+};
+
+
+} // End anonymous namespace
+
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
+  : MCELFObjectTargetWriter(false, 0, 0, false) { }
+
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+  MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+  return createELFObjectWriter(MOTW, OS, true);
+}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index b7cdd7c..2aae26a 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
   //===--- Dwarf Emission Directives -----------------------------------===//
   HasLEB128 = true;
   SupportsDebugInformation = true;
-  DwarfSectionOffsetDirective = ".offset";
-
 }
 
 const char*
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 072ee49..61d70bb 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -78,7 +78,7 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
   if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
     return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
   } else {
-    return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+    return createR600MCCodeEmitter(MCII, MRI, STI);
   }
 }
 
@@ -88,7 +88,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
                                     MCCodeEmitter *_Emitter,
                                     bool RelaxAll,
                                     bool NoExecStack) {
-  return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+  return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
 }
 
 extern "C" void LLVMInitializeR600TargetMC() {
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 363a4af..abb0320 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -23,16 +23,17 @@ class MCAsmBackend;
 class MCCodeEmitter;
 class MCContext;
 class MCInstrInfo;
+class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class raw_ostream;
 
 extern Target TheAMDGPUTarget;
 
 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI,
-                                       MCContext &Ctx);
+                                       const MCSubtargetInfo &STI);
 
 MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                      const MCRegisterInfo &MRI,
@@ -41,6 +42,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
                                      StringRef CPU);
+
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
 } // End llvm namespace
 
 #define GET_REGINFO_ENUM
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 927bcbd..cb4cf0c 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -9,12 +9,8 @@
 //
 /// \file
 ///
-/// This code emitter outputs bytecode that is understood by the r600g driver
-/// in the Mesa [1] project.  The bytecode is very similar to the hardware's ISA,
-/// but it still needs to be run through a finalizer in order to be executed
-/// by the GPU.
-///
-/// [1] http://www.mesa3d.org/
+/// \brief The R600 code emitter produces machine code that can be executed
+/// directly on the GPU device.
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,9 +26,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <stdio.h>
 
-#define SRC_BYTE_COUNT 11
-#define DST_BYTE_COUNT 5
-
 using namespace llvm;
 
 namespace {
@@ -43,13 +36,12 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
   const MCSubtargetInfo &STI;
-  MCContext &Ctx;
 
 public:
 
   R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
-                    const MCSubtargetInfo &sti, MCContext &ctx)
-    : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+                    const MCSubtargetInfo &sti)
+    : MCII(mcii), MRI(mri), STI(sti) { }
 
   /// \brief Encode the instruction and write it to the OS.
   virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -60,30 +52,14 @@ public:
                                      SmallVectorImpl<MCFixup> &Fixups) const;
 private:
 
-  void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
-                    raw_ostream &OS) const;
-  void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
-  void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
-                    raw_ostream &OS) const;
-  void EmitDst(const MCInst &MI, raw_ostream &OS) const;
-  void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
-
-  void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
-
   void EmitByte(unsigned int byte, raw_ostream &OS) const;
 
-  void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
-
   void Emit(uint32_t value, raw_ostream &OS) const;
   void Emit(uint64_t value, raw_ostream &OS) const;
 
   unsigned getHWRegChan(unsigned reg) const;
   unsigned getHWReg(unsigned regNo) const;
 
-  bool isFCOp(unsigned opcode) const;
-  bool isTexOp(unsigned opcode) const;
-  bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
-
 };
 
 } // End anonymous namespace
@@ -95,16 +71,6 @@ enum RegElement {
   ELEMENT_W
 };
 
-enum InstrTypes {
-  INSTR_ALU = 0,
-  INSTR_TEX,
-  INSTR_FC,
-  INSTR_NATIVE,
-  INSTR_VTX,
-  INSTR_EXPORT,
-  INSTR_CFALU
-};
-
 enum FCInstr {
   FC_IF_PREDICATE = 0,
   FC_ELSE,
@@ -132,355 +98,95 @@ enum TextureTypes {
 
 MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                            const MCRegisterInfo &MRI,
-                                           const MCSubtargetInfo &STI,
-                                           MCContext &Ctx) {
-  return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+                                           const MCSubtargetInfo &STI) {
+  return new R600MCCodeEmitter(MCII, MRI, STI);
 }
 
 void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups) const {
-  if (isFCOp(MI.getOpcode())){
-    EmitFCInstr(MI, OS);
-  } else if (MI.getOpcode() == AMDGPU::RETURN ||
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  if (MI.getOpcode() == AMDGPU::RETURN ||
+    MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
+    MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
     MI.getOpcode() == AMDGPU::BUNDLE ||
     MI.getOpcode() == AMDGPU::KILL) {
     return;
-  } else {
-    switch(MI.getOpcode()) {
-    case AMDGPU::STACK_SIZE: {
-      EmitByte(MI.getOperand(0).getImm(), OS);
-      break;
-    }
-    case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
-    case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
-      uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_NATIVE, OS);
-      Emit(inst, OS);
-      break;
-    }
-    case AMDGPU::CONSTANT_LOAD_eg:
-    case AMDGPU::VTX_READ_PARAM_8_eg:
-    case AMDGPU::VTX_READ_PARAM_16_eg:
-    case AMDGPU::VTX_READ_PARAM_32_eg:
-    case AMDGPU::VTX_READ_PARAM_128_eg:
-    case AMDGPU::VTX_READ_GLOBAL_8_eg:
-    case AMDGPU::VTX_READ_GLOBAL_32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_128_eg:
-    case AMDGPU::TEX_VTX_CONSTBUF:
-    case AMDGPU::TEX_VTX_TEXBUF : {
-      uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-      uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
-
-      EmitByte(INSTR_VTX, OS);
-      Emit(InstWord01, OS);
-      Emit(InstWord2, OS);
-      break;
-    }
-    case AMDGPU::TEX_LD:
-    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-    case AMDGPU::TEX_SAMPLE:
-    case AMDGPU::TEX_SAMPLE_C:
-    case AMDGPU::TEX_SAMPLE_L:
-    case AMDGPU::TEX_SAMPLE_C_L:
-    case AMDGPU::TEX_SAMPLE_LB:
-    case AMDGPU::TEX_SAMPLE_C_LB:
-    case AMDGPU::TEX_SAMPLE_G:
-    case AMDGPU::TEX_SAMPLE_C_G:
-    case AMDGPU::TEX_GET_GRADIENTS_H:
-    case AMDGPU::TEX_GET_GRADIENTS_V:
-    case AMDGPU::TEX_SET_GRADIENTS_H:
-    case AMDGPU::TEX_SET_GRADIENTS_V: {
-      unsigned Opcode = MI.getOpcode();
-      bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
-      unsigned OpOffset = HasOffsets ? 3 : 0;
-      int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
-      int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
-
-      uint32_t SrcSelect[4] = {0, 1, 2, 3};
-      uint32_t Offsets[3] = {0, 0, 0};
-      uint64_t CoordType[4] = {1, 1, 1, 1};
-
-      if (HasOffsets)
-        for (unsigned i = 0; i < 3; i++) {
-          int SignedOffset = MI.getOperand(i + 2).getImm();
-          Offsets[i] = (SignedOffset & 0x1F);
-        }
-          
-
-      if (TextureType == TEXTURE_RECT ||
-          TextureType == TEXTURE_SHADOWRECT) {
-        CoordType[ELEMENT_X] = 0;
-        CoordType[ELEMENT_Y] = 0;
-      }
-
-      if (TextureType == TEXTURE_1D_ARRAY ||
-          TextureType == TEXTURE_SHADOW1D_ARRAY) {
-        if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
-            Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
-          CoordType[ELEMENT_Y] = 0;
-        } else {
-          CoordType[ELEMENT_Z] = 0;
-          SrcSelect[ELEMENT_Z] = ELEMENT_Y;
-        }
-      } else if (TextureType == TEXTURE_2D_ARRAY ||
-          TextureType == TEXTURE_SHADOW2D_ARRAY) {
-        CoordType[ELEMENT_Z] = 0;
+  } else if (IS_VTX(Desc)) {
+    uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+    uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+    InstWord2 |= 1 << 19;
+
+    Emit(InstWord01, OS);
+    Emit(InstWord2, OS);
+    Emit((u_int32_t) 0, OS);
+  } else if (IS_TEX(Desc)) {
+    unsigned Opcode = MI.getOpcode();
+    bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+    unsigned OpOffset = HasOffsets ? 3 : 0;
+    int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+    int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+    uint32_t SrcSelect[4] = {0, 1, 2, 3};
+    uint32_t Offsets[3] = {0, 0, 0};
+    uint64_t CoordType[4] = {1, 1, 1, 1};
+
+    if (HasOffsets)
+      for (unsigned i = 0; i < 3; i++) {
+        int SignedOffset = MI.getOperand(i + 2).getImm();
+        Offsets[i] = (SignedOffset & 0x1F);
       }
 
-
-      if ((TextureType == TEXTURE_SHADOW1D ||
-          TextureType == TEXTURE_SHADOW2D ||
-          TextureType == TEXTURE_SHADOWRECT ||
-          TextureType == TEXTURE_SHADOW1D_ARRAY) &&
-          Opcode != AMDGPU::TEX_SAMPLE_C_L &&
-          Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
-        SrcSelect[ELEMENT_W] = ELEMENT_Z;
-      }
-
-      uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
-          CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
-          CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
-      uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
-          SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
-          SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
-          Offsets[2] << 10;
-
-      EmitByte(INSTR_TEX, OS);
-      Emit(Word01, OS);
-      Emit(Word2, OS);
-      break;
-    }
-    case AMDGPU::EG_ExportSwz:
-    case AMDGPU::R600_ExportSwz:
-    case AMDGPU::EG_ExportBuf:
-    case AMDGPU::R600_ExportBuf: {
-      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_EXPORT, OS);
-      Emit(Inst, OS);
-      break;
-    }
-    case AMDGPU::CF_ALU:
-    case AMDGPU::CF_ALU_PUSH_BEFORE: {
-      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_CFALU, OS);
-      Emit(Inst, OS);
-      break;
-    }
-    case AMDGPU::CF_TC:
-    case AMDGPU::CF_VC:
-    case AMDGPU::CF_CALL_FS:
-      return;
-    case AMDGPU::WHILE_LOOP:
-    case AMDGPU::END_LOOP:
-    case AMDGPU::LOOP_BREAK:
-    case AMDGPU::CF_CONTINUE:
-    case AMDGPU::CF_JUMP:
-    case AMDGPU::CF_ELSE:
-    case AMDGPU::POP: {
-      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_NATIVE, OS);
-      Emit(Inst, OS);
-      break;
+    if (TextureType == TEXTURE_RECT ||
+        TextureType == TEXTURE_SHADOWRECT) {
+      CoordType[ELEMENT_X] = 0;
+      CoordType[ELEMENT_Y] = 0;
     }
-    default:
-      EmitALUInstr(MI, Fixups, OS);
-      break;
-    }
-  }
-}
-
-void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
-                                     SmallVectorImpl<MCFixup> &Fixups,
-                                     raw_ostream &OS) const {
-  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-
-  // Emit instruction type
-  EmitByte(INSTR_ALU, OS);
-
-  uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-
-  //older alu have different encoding for instructions with one or two src
-  //parameters.
-  if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
-      !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
-    uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
-    InstWord01 &= ~(0x3FFULL << 39);
-    InstWord01 |= ISAOpCode << 1;
-  }
-
-  unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
-      MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
-
-  EmitByte(SrcNum, OS);
-
-  const unsigned SrcOps[3][2] = {
-      {R600Operands::SRC0, R600Operands::SRC0_SEL},
-      {R600Operands::SRC1, R600Operands::SRC1_SEL},
-      {R600Operands::SRC2, R600Operands::SRC2_SEL}
-  };
 
-  for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
-    unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
-    unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
-    EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
-  }
-
-  Emit(InstWord01, OS);
-  return;
-}
-
-void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
-                                raw_ostream &OS) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  union {
-    float f;
-    uint32_t i;
-  } Value;
-  Value.i = 0;
-  // Emit the source select (2 bytes).  For GPRs, this is the register index.
-  // For other potential instruction operands, (e.g. constant registers) the
-  // value of the source select is defined in the r600isa docs.
-  if (MO.isReg()) {
-    unsigned reg = MO.getReg();
-    EmitTwoBytes(getHWReg(reg), OS);
-    if (reg == AMDGPU::ALU_LITERAL_X) {
-      unsigned ImmOpIndex = MI.getNumOperands() - 1;
-      MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-      if (ImmOp.isFPImm()) {
-        Value.f = ImmOp.getFPImm();
+    if (TextureType == TEXTURE_1D_ARRAY ||
+        TextureType == TEXTURE_SHADOW1D_ARRAY) {
+      if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+          Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+        CoordType[ELEMENT_Y] = 0;
       } else {
-        assert(ImmOp.isImm());
-        Value.i = ImmOp.getImm();
+        CoordType[ELEMENT_Z] = 0;
+        SrcSelect[ELEMENT_Z] = ELEMENT_Y;
       }
+    } else if (TextureType == TEXTURE_2D_ARRAY ||
+        TextureType == TEXTURE_SHADOW2D_ARRAY) {
+      CoordType[ELEMENT_Z] = 0;
     }
-  } else {
-    // XXX: Handle other operand types.
-    EmitTwoBytes(0, OS);
-  }
-
-  // Emit the source channel (1 byte)
-  if (MO.isReg()) {
-    EmitByte(getHWRegChan(MO.getReg()), OS);
-  } else {
-    EmitByte(0, OS);
-  }
-
-  // XXX: Emit isNegated (1 byte)
-  if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
-      && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
-     (MO.isReg() &&
-      (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
-    EmitByte(1, OS);
-  } else {
-    EmitByte(0, OS);
-  }
-
-  // Emit isAbsolute (1 byte)
-  if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
-    EmitByte(1, OS);
-  } else {
-    EmitByte(0, OS);
-  }
-
-  // XXX: Emit relative addressing mode (1 byte)
-  EmitByte(0, OS);
-
-  // Emit kc_bank, This will be adjusted later by r600_asm
-  EmitByte(0, OS);
 
-  // Emit the literal value, if applicable (4 bytes).
-  Emit(Value.i, OS);
 
-}
-
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
-                                   unsigned SelOpIdx, raw_ostream &OS) const {
-  const MCOperand &RegMO = MI.getOperand(RegOpIdx);
-  const MCOperand &SelMO = MI.getOperand(SelOpIdx);
-
-  union {
-    float f;
-    uint32_t i;
-  } InlineConstant;
-  InlineConstant.i = 0;
-  // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
-  // and select is 0 (GPR index is encoded in the instr encoding. For constants
-  // type is 1 and select is the original const select passed from the driver.
-  unsigned Reg = RegMO.getReg();
-  if (Reg == AMDGPU::ALU_CONST) {
-    EmitByte(1, OS);
-    uint32_t Sel = SelMO.getImm();
-    Emit(Sel, OS);
-  } else {
-    EmitByte(0, OS);
-    Emit((uint32_t)0, OS);
-  }
-
-  if (Reg == AMDGPU::ALU_LITERAL_X) {
-    unsigned ImmOpIndex = MI.getNumOperands() - 1;
-    MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-    if (ImmOp.isFPImm()) {
-      InlineConstant.f = ImmOp.getFPImm();
-    } else {
-      assert(ImmOp.isImm());
-      InlineConstant.i = ImmOp.getImm();
+    if ((TextureType == TEXTURE_SHADOW1D ||
+        TextureType == TEXTURE_SHADOW2D ||
+        TextureType == TEXTURE_SHADOWRECT ||
+        TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+        Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+        Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+      SrcSelect[ELEMENT_W] = ELEMENT_Z;
     }
-  }
-
-  // Emit the literal value, if applicable (4 bytes).
-  Emit(InlineConstant.i, OS);
-}
-
-void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
-
-  // Emit instruction type
-  EmitByte(INSTR_FC, OS);
 
-  // Emit SRC
-  unsigned NumOperands = MI.getNumOperands();
-  if (NumOperands > 0) {
-    assert(NumOperands == 1);
-    EmitSrc(MI, 0, OS);
+    uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+        CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+        CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+    uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+        SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+        SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+        Offsets[2] << 10;
+
+    Emit(Word01, OS);
+    Emit(Word2, OS);
+    Emit((u_int32_t) 0, OS);
   } else {
-    EmitNullBytes(SRC_BYTE_COUNT, OS);
-  }
-
-  // Emit FC Instruction
-  enum FCInstr instr;
-  switch (MI.getOpcode()) {
-  case AMDGPU::PREDICATED_BREAK:
-    instr = FC_BREAK_PREDICATE;
-    break;
-  case AMDGPU::CONTINUE:
-    instr = FC_CONTINUE;
-    break;
-  case AMDGPU::IF_PREDICATE_SET:
-    instr = FC_IF_PREDICATE;
-    break;
-  case AMDGPU::ELSE:
-    instr = FC_ELSE;
-    break;
-  case AMDGPU::ENDIF:
-    instr = FC_ENDIF;
-    break;
-  case AMDGPU::ENDLOOP:
-    instr = FC_ENDLOOP;
-    break;
-  case AMDGPU::WHILELOOP:
-    instr = FC_BGNLOOP;
-    break;
-  default:
-    abort();
-    break;
-  }
-  EmitByte(instr, OS);
-}
-
-void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
-                                      raw_ostream &OS) const {
-
-  for (unsigned int i = 0; i < ByteCount; i++) {
-    EmitByte(0, OS);
+    uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+    if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+       ((Desc.TSFlags & R600_InstFlag::OP1) ||
+         Desc.TSFlags & R600_InstFlag::OP2)) {
+      uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
+      Inst &= ~(0x3FFULL << 39);
+      Inst |= ISAOpCode << 1;
+    }
+    Emit(Inst, OS);
   }
 }
 
@@ -488,12 +194,6 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
   OS.write((uint8_t) Byte & 0xff);
 }
 
-void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
-                                     raw_ostream &OS) const {
-  OS.write((uint8_t) (Bytes & 0xff));
-  OS.write((uint8_t) ((Bytes >> 8) & 0xff));
-}
-
 void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
   for (unsigned i = 0; i < 4; i++) {
     OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
@@ -531,55 +231,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
   }
 }
 
-//===----------------------------------------------------------------------===//
-// Encoding helper functions
-//===----------------------------------------------------------------------===//
-
-bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
-  switch(opcode) {
-  default: return false;
-  case AMDGPU::PREDICATED_BREAK:
-  case AMDGPU::CONTINUE:
-  case AMDGPU::IF_PREDICATE_SET:
-  case AMDGPU::ELSE:
-  case AMDGPU::ENDIF:
-  case AMDGPU::ENDLOOP:
-  case AMDGPU::WHILELOOP:
-    return true;
-  }
-}
-
-bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
-  switch(opcode) {
-  default: return false;
-  case AMDGPU::TEX_LD:
-  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-  case AMDGPU::TEX_SAMPLE:
-  case AMDGPU::TEX_SAMPLE_C:
-  case AMDGPU::TEX_SAMPLE_L:
-  case AMDGPU::TEX_SAMPLE_C_L:
-  case AMDGPU::TEX_SAMPLE_LB:
-  case AMDGPU::TEX_SAMPLE_C_LB:
-  case AMDGPU::TEX_SAMPLE_G:
-  case AMDGPU::TEX_SAMPLE_C_G:
-  case AMDGPU::TEX_GET_GRADIENTS_H:
-  case AMDGPU::TEX_GET_GRADIENTS_V:
-  case AMDGPU::TEX_SET_GRADIENTS_H:
-  case AMDGPU::TEX_SET_GRADIENTS_V:
-    return true;
-  }
-}
-
-bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
-                                  unsigned Flag) const {
-  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-  unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
-  if (FlagIndex == 0) {
-    return false;
-  }
-  assert(MI.getOperand(FlagIndex).isImm());
-  return !!((MI.getOperand(FlagIndex).getImm() >>
-            (NUM_MO_FLAGS * Operand)) & Flag);
-}
-
 #include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/R600/Processors.td b/contrib/llvm/lib/Target/R600/Processors.td
index 868810c..0cbe919 100644
--- a/contrib/llvm/lib/Target/R600/Processors.td
+++ b/contrib/llvm/lib/Target/R600/Processors.td
@@ -1,4 +1,4 @@
-//===-- Processors.td - TODO: Add brief description -------===//
+//===-- Processors.td - R600 Processor definitions ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,25 +6,43 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// AMDIL processors supported.
-//
-//===----------------------------------------------------------------------===//
 
 class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
 : Processor<Name, itin, Features>;
-def : Proc<"",           R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"r600",       R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rv710",      R600_EG_Itin, []>;
-def : Proc<"rv730",      R600_EG_Itin, []>;
-def : Proc<"rv770",      R600_EG_Itin, [FeatureFP64]>;
-def : Proc<"cedar",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"redwood",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"juniper",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cypress",    R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"barts",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"turks",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"caicos",     R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cayman",     R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
-
+def : Proc<"",           R600_VLIW5_Itin,
+    [FeatureR600ALUInst, FeatureVertexCache]>;
+def : Proc<"r600",       R600_VLIW5_Itin,
+    [FeatureR600ALUInst , FeatureVertexCache]>;
+def : Proc<"rs880",      R600_VLIW5_Itin,
+    [FeatureR600ALUInst]>;
+def : Proc<"rv670",      R600_VLIW5_Itin,
+    [FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"rv710",      R600_VLIW5_Itin,
+    [FeatureVertexCache]>;
+def : Proc<"rv730",      R600_VLIW5_Itin,
+    [FeatureVertexCache]>;
+def : Proc<"rv770",      R600_VLIW5_Itin,
+    [FeatureFP64, FeatureVertexCache]>;
+def : Proc<"cedar",      R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"redwood",    R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"sumo",       R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper",    R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"cypress",    R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"barts",      R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"turks",      R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"caicos",     R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman",     R600_VLIW4_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureFP64]>;def : Proc<"SI",         SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"tahiti",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"pitcairn",   SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"verde",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"oland",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"hainan",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 3a6c7ea..ffe3414 100644
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -30,35 +30,27 @@ namespace llvm {
 class R600ControlFlowFinalizer : public MachineFunctionPass {
 
 private:
+  typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
+
+  enum ControlFlowInstruction {
+    CF_TC,
+    CF_VC,
+    CF_CALL_FS,
+    CF_WHILE_LOOP,
+    CF_END_LOOP,
+    CF_LOOP_BREAK,
+    CF_LOOP_CONTINUE,
+    CF_JUMP,
+    CF_ELSE,
+    CF_POP,
+    CF_END
+  };
+
   static char ID;
   const R600InstrInfo *TII;
+  const R600RegisterInfo &TRI;
   unsigned MaxFetchInst;
-
-  bool isFetch(const MachineInstr *MI) const {
-    switch (MI->getOpcode()) {
-    case AMDGPU::TEX_VTX_CONSTBUF:
-    case AMDGPU::TEX_VTX_TEXBUF:
-    case AMDGPU::TEX_LD:
-    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-    case AMDGPU::TEX_GET_GRADIENTS_H:
-    case AMDGPU::TEX_GET_GRADIENTS_V:
-    case AMDGPU::TEX_SET_GRADIENTS_H:
-    case AMDGPU::TEX_SET_GRADIENTS_V:
-    case AMDGPU::TEX_SAMPLE:
-    case AMDGPU::TEX_SAMPLE_C:
-    case AMDGPU::TEX_SAMPLE_L:
-    case AMDGPU::TEX_SAMPLE_C_L:
-    case AMDGPU::TEX_SAMPLE_LB:
-    case AMDGPU::TEX_SAMPLE_C_LB:
-    case AMDGPU::TEX_SAMPLE_G:
-    case AMDGPU::TEX_SAMPLE_C_G:
-    case AMDGPU::TXD:
-    case AMDGPU::TXD_SHADOW:
-     return true;
-    default:
-      return false;
-    }
-  }
+  const AMDGPUSubtarget &ST;
 
   bool IsTrivialInst(MachineInstr *MI) const {
     switch (MI->getOpcode()) {
@@ -70,26 +62,226 @@ private:
     }
   }
 
-  MachineBasicBlock::iterator
-  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-      unsigned CfAddress) const {
+  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+    unsigned Opcode = 0;
+    bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
+    switch (CFI) {
+    case CF_TC:
+      Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
+      break;
+    case CF_VC:
+      Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+      break;
+    case CF_CALL_FS:
+      Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
+      break;
+    case CF_WHILE_LOOP:
+      Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
+      break;
+    case CF_END_LOOP:
+      Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
+      break;
+    case CF_LOOP_BREAK:
+      Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
+      break;
+    case CF_LOOP_CONTINUE:
+      Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
+      break;
+    case CF_JUMP:
+      Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
+      break;
+    case CF_ELSE:
+      Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
+      break;
+    case CF_POP:
+      Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
+      break;
+    case CF_END:
+      if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) {
+        Opcode = AMDGPU::CF_END_CM;
+        break;
+      }
+      Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
+      break;
+    }
+    assert (Opcode && "No opcode selected");
+    return TII->get(Opcode);
+  }
+
+  bool isCompatibleWithClause(const MachineInstr *MI,
+  std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const {
+    unsigned DstMI, SrcMI;
+    for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+        E = MI->operands_end(); I != E; ++I) {
+      const MachineOperand &MO = *I;
+      if (!MO.isReg())
+        continue;
+      if (MO.isDef())
+        DstMI = MO.getReg();
+      if (MO.isUse()) {
+        unsigned Reg = MO.getReg();
+        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+          SrcMI = Reg;
+        else
+          SrcMI = TRI.getMatchingSuperReg(Reg,
+              TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)),
+              &AMDGPU::R600_Reg128RegClass);
+      }
+    }
+    if ((DstRegs.find(SrcMI) == DstRegs.end()) &&
+        (SrcRegs.find(DstMI) == SrcRegs.end())) {
+      SrcRegs.insert(SrcMI);
+      DstRegs.insert(DstMI);
+      return true;
+    } else
+      return false;
+  }
+
+  ClauseFile
+  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+      const {
     MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<MachineInstr *> ClauseContent;
     unsigned AluInstCount = 0;
+    bool IsTex = TII->usesTextureCache(ClauseHead);
+    std::set<unsigned> DstRegs, SrcRegs;
     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
       if (IsTrivialInst(I))
         continue;
-      if (!isFetch(I))
+      if (AluInstCount > MaxFetchInst)
+        break;
+      if ((IsTex && !TII->usesTextureCache(I)) ||
+          (!IsTex && !TII->usesVertexCache(I)))
+        break;
+      if (!isCompatibleWithClause(I, DstRegs, SrcRegs))
         break;
       AluInstCount ++;
-      if (AluInstCount > MaxFetchInst)
+      ClauseContent.push_back(I);
+    }
+    MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+        getHWInstrDesc(IsTex?CF_TC:CF_VC))
+        .addImm(0) // ADDR
+        .addImm(AluInstCount - 1); // COUNT
+    return ClauseFile(MIb, ClauseContent);
+  }
+
+  void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
+    unsigned LiteralRegs[] = {
+      AMDGPU::ALU_LITERAL_X,
+      AMDGPU::ALU_LITERAL_Y,
+      AMDGPU::ALU_LITERAL_Z,
+      AMDGPU::ALU_LITERAL_W
+    };
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      if (MO.getReg() != AMDGPU::ALU_LITERAL_X)
+        continue;
+      unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM);
+      int64_t Imm = MI->getOperand(ImmIdx).getImm();
+      std::vector<int64_t>::iterator It =
+          std::find(Lits.begin(), Lits.end(), Imm);
+      if (It != Lits.end()) {
+        unsigned Index = It - Lits.begin();
+        MO.setReg(LiteralRegs[Index]);
+      } else {
+        assert(Lits.size() < 4 && "Too many literals in Instruction Group");
+        MO.setReg(LiteralRegs[Lits.size()]);
+        Lits.push_back(Imm);
+      }
+    }
+  }
+
+  MachineBasicBlock::iterator insertLiterals(
+      MachineBasicBlock::iterator InsertPos,
+      const std::vector<unsigned> &Literals) const {
+    MachineBasicBlock *MBB = InsertPos->getParent();
+    for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+      unsigned LiteralPair0 = Literals[i];
+      unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
+      InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
+          TII->get(AMDGPU::LITERALS))
+          .addImm(LiteralPair0)
+          .addImm(LiteralPair1);
+    }
+    return InsertPos;
+  }
+
+  ClauseFile
+  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+      const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<MachineInstr *> ClauseContent;
+    I++;
+    for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
+      if (IsTrivialInst(I)) {
+        ++I;
+        continue;
+      }
+      if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
         break;
+      std::vector<int64_t> Literals;
+      if (I->isBundle()) {
+        MachineInstr *DeleteMI = I;
+        MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+        while (++BI != E && BI->isBundledWithPred()) {
+          BI->unbundleFromPred();
+          for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = BI->getOperand(i);
+            if (MO.isReg() && MO.isInternalRead())
+              MO.setIsInternalRead(false);
+          }
+          getLiteral(BI, Literals);
+          ClauseContent.push_back(BI);
+        }
+        I = BI;
+        DeleteMI->eraseFromParent();
+      } else {
+        getLiteral(I, Literals);
+        ClauseContent.push_back(I);
+        I++;
+      }
+      for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+        unsigned literal0 = Literals[i];
+        unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
+        MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
+            TII->get(AMDGPU::LITERALS))
+            .addImm(literal0)
+            .addImm(literal2);
+        ClauseContent.push_back(MILit);
+      }
     }
-    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
-        TII->get(AMDGPU::CF_TC))
-        .addImm(CfAddress) // ADDR
-        .addImm(AluInstCount); // COUNT
-    return I;
+    ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
+    return ClauseFile(ClauseHead, ClauseContent);
   }
+
+  void
+  EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+      unsigned &CfCount) {
+    CounterPropagateAddr(Clause.first, CfCount);
+    MachineBasicBlock *BB = Clause.first->getParent();
+    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
+        .addImm(CfCount);
+    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+      BB->splice(InsertPos, BB, Clause.second[i]);
+    }
+    CfCount += 2 * Clause.second.size();
+  }
+
+  void
+  EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+      unsigned &CfCount) {
+    CounterPropagateAddr(Clause.first, CfCount);
+    MachineBasicBlock *BB = Clause.first->getParent();
+    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
+        .addImm(CfCount);
+    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+      BB->splice(InsertPos, BB, Clause.second[i]);
+    }
+    CfCount += Clause.second.size();
+  }
+
   void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
     MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
   }
@@ -102,9 +294,27 @@ private:
     }
   }
 
+  unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
+    switch (ST.device()->getGeneration()) {
+    case AMDGPUDeviceInfo::HD4XXX:
+      if (hasPush)
+        StackSubEntry += 2;
+      break;
+    case AMDGPUDeviceInfo::HD5XXX:
+      if (hasPush)
+        StackSubEntry ++;
+    case AMDGPUDeviceInfo::HD6XXX:
+      StackSubEntry += 2;
+      break;
+    }
+    return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
+  }
+
 public:
   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
-    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+    TRI(TII->getRegisterInfo()),
+    ST(tm.getSubtarget<AMDGPUSubtarget>()) {
       const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
       if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
         MaxFetchInst = 8;
@@ -115,6 +325,7 @@ public:
   virtual bool runOnMachineFunction(MachineFunction &MF) {
     unsigned MaxStack = 0;
     unsigned CurrentStack = 0;
+    bool HasPush = false;
     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
         ++MB) {
       MachineBasicBlock &MBB = *MB;
@@ -124,14 +335,16 @@ public:
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
       if (MFI->ShaderType == 1) {
         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-            TII->get(AMDGPU::CF_CALL_FS));
+            getHWInstrDesc(CF_CALL_FS));
         CfCount++;
+        MaxStack = 1;
       }
+      std::vector<ClauseFile> FetchClauses, AluClauses;
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E;) {
-        if (isFetch(I)) {
+        if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
           DEBUG(dbgs() << CfCount << ":"; I->dump(););
-          I = MakeFetchClause(MBB, I, 0);
+          FetchClauses.push_back(MakeFetchClause(MBB, I));
           CfCount++;
           continue;
         }
@@ -142,20 +355,25 @@ public:
         case AMDGPU::CF_ALU_PUSH_BEFORE:
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
+          HasPush = true;
         case AMDGPU::CF_ALU:
+          I = MI;
+          AluClauses.push_back(MakeALUClause(MBB, I));
         case AMDGPU::EG_ExportBuf:
         case AMDGPU::EG_ExportSwz:
         case AMDGPU::R600_ExportBuf:
         case AMDGPU::R600_ExportSwz:
+        case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+        case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
           CfCount++;
           break;
         case AMDGPU::WHILELOOP: {
-          CurrentStack++;
+          CurrentStack+=4;
           MaxStack = std::max(MaxStack, CurrentStack);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::WHILE_LOOP))
-              .addImm(2);
+              getHWInstrDesc(CF_WHILE_LOOP))
+              .addImm(1);
           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
               std::set<MachineInstr *>());
           Pair.second.insert(MIb);
@@ -165,12 +383,12 @@ public:
           break;
         }
         case AMDGPU::ENDLOOP: {
-          CurrentStack--;
+          CurrentStack-=4;
           std::pair<unsigned, std::set<MachineInstr *> > Pair =
               LoopStack.back();
           LoopStack.pop_back();
           CounterPropagateAddr(Pair.second, CfCount);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
               .addImm(Pair.first + 1);
           MI->eraseFromParent();
           CfCount++;
@@ -178,7 +396,7 @@ public:
         }
         case AMDGPU::IF_PREDICATE_SET: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_JUMP))
+              getHWInstrDesc(CF_JUMP))
               .addImm(0)
               .addImm(0);
           IfThenElseStack.push_back(MIb);
@@ -192,7 +410,7 @@ public:
           IfThenElseStack.pop_back();
           CounterPropagateAddr(JumpInst, CfCount);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_ELSE))
+              getHWInstrDesc(CF_ELSE))
               .addImm(0)
               .addImm(1);
           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -207,9 +425,10 @@ public:
           IfThenElseStack.pop_back();
           CounterPropagateAddr(IfOrElseInst, CfCount + 1);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::POP))
+              getHWInstrDesc(CF_POP))
               .addImm(CfCount + 1)
               .addImm(1);
+          (void)MIb;
           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
           MI->eraseFromParent();
           CfCount++;
@@ -218,13 +437,13 @@ public:
         case AMDGPU::PREDICATED_BREAK: {
           CurrentStack--;
           CfCount += 3;
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
               .addImm(CfCount)
               .addImm(1);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::LOOP_BREAK))
+              getHWInstrDesc(CF_LOOP_BREAK))
               .addImm(0);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
               .addImm(CfCount)
               .addImm(1);
           LoopStack.back().second.insert(MIb);
@@ -233,20 +452,31 @@ public:
         }
         case AMDGPU::CONTINUE: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_CONTINUE))
+              getHWInstrDesc(CF_LOOP_CONTINUE))
               .addImm(0);
           LoopStack.back().second.insert(MIb);
           MI->eraseFromParent();
           CfCount++;
           break;
         }
+        case AMDGPU::RETURN: {
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
+          CfCount++;
+          MI->eraseFromParent();
+          if (CfCount % 2) {
+            BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
+            CfCount++;
+          }
+          for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
+            EmitFetchClause(I, FetchClauses[i], CfCount);
+          for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
+            EmitALUClause(I, AluClauses[i], CfCount);
+        }
         default:
           break;
         }
       }
-      BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-          TII->get(AMDGPU::STACK_SIZE))
-          .addImm(MaxStack);
+      MFI->StackSize = getHWStackSize(MaxStack, HasPush);
     }
 
     return false;
@@ -265,4 +495,3 @@ char R600ControlFlowFinalizer::ID = 0;
 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
   return new R600ControlFlowFinalizer(TM);
 }
-
diff --git a/contrib/llvm/lib/Target/R600/R600Defines.h b/contrib/llvm/lib/Target/R600/R600Defines.h
index 16cfcf5..36bfb18 100644
--- a/contrib/llvm/lib/Target/R600/R600Defines.h
+++ b/contrib/llvm/lib/Target/R600/R600Defines.h
@@ -39,7 +39,9 @@ namespace R600_InstFlag {
     //FlagOperand bits 7, 8
     NATIVE_OPERANDS = (1 << 9),
     OP1 = (1 << 10),
-    OP2 = (1 << 11)
+    OP2 = (1 << 11),
+    VTX_INST  = (1 << 12),
+    TEX_INST = (1 << 13)
   };
 }
 
@@ -52,6 +54,9 @@ namespace R600_InstFlag {
 #define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
 #define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
 
+#define IS_VTX(desc) ((desc).TSFlags & R600_InstFlag::VTX_INST)
+#define IS_TEX(desc) ((desc).TSFlags & R600_InstFlag::TEX_INST)
+
 namespace R600Operands {
   enum Ops {
     DST,
@@ -78,6 +83,7 @@ namespace R600Operands {
     LAST,
     PRED_SEL,
     IMM,
+    BANK_SWIZZLE,
     COUNT
  };
 
@@ -85,13 +91,39 @@ namespace R600Operands {
 //            W        C     S  S  S  S     S  S  S  S     S  S  S
 //            R  O  D  L  S  R  R  R  R  S  R  R  R  R  S  R  R  R  L  P
 //   D  U     I  M  R  A  R  C  C  C  C  R  C  C  C  C  R  C  C  C  A  R  I
-//   S  E  U  T  O  E  M  C  0  0  0  0  C  1  1  1  1  C  2  2  2  S  E  M
-//   T  M  P  E  D  L  P  0  N  R  A  S  1  N  R  A  S  2  N  R  S  T  D  M
-    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
-    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
-    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+//   S  E  U  T  O  E  M  C  0  0  0  0  C  1  1  1  1  C  2  2  2  S  E  M  B
+//   T  M  P  E  D  L  P  0  N  R  A  S  1  N  R  A  S  2  N  R  S  T  D  M  S
+    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12,13},
+    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19,20},
+    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17,18}
   };
 
 }
 
+//===----------------------------------------------------------------------===//
+// Config register definitions
+//===----------------------------------------------------------------------===//
+
+#define R_02880C_DB_SHADER_CONTROL                    0x02880C
+#define   S_02880C_KILL_ENABLE(x)                      (((x) & 0x1) << 6)
+
+// These fields are the same for all shader types and families.
+#define   S_NUM_GPRS(x)                         (((x) & 0xFF) << 0)
+#define   S_STACK_SIZE(x)                       (((x) & 0xFF) << 8)
+//===----------------------------------------------------------------------===//
+// R600, R700 Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028850_SQ_PGM_RESOURCES_PS                 0x028850
+#define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
+
+//===----------------------------------------------------------------------===//
+// Evergreen, Northern Islands Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028844_SQ_PGM_RESOURCES_PS                 0x028844
+#define R_028860_SQ_PGM_RESOURCES_VS                 0x028860
+#define R_028878_SQ_PGM_RESOURCES_GS                 0x028878
+#define R_0288D4_SQ_PGM_RESOURCES_LS                 0x0288d4
+
 #endif // R600DEFINES_H_
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
index 53e6e51..7252235 100644
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
@@ -43,11 +43,25 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::AND,  MVT::v4i32, Expand);
   setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
   setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+  setOperationAction(ISD::MUL,  MVT::v2i32, Expand);
+  setOperationAction(ISD::MUL,  MVT::v4i32, Expand);
+  setOperationAction(ISD::OR, MVT::v4i32, Expand);
+  setOperationAction(ISD::OR, MVT::v2i32, Expand);
   setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+  setOperationAction(ISD::SHL, MVT::v4i32, Expand);
+  setOperationAction(ISD::SHL, MVT::v2i32, Expand);
+  setOperationAction(ISD::SRL, MVT::v4i32, Expand);
+  setOperationAction(ISD::SRL, MVT::v2i32, Expand);
+  setOperationAction(ISD::SRA, MVT::v4i32, Expand);
+  setOperationAction(ISD::SRA, MVT::v2i32, Expand);
+  setOperationAction(ISD::SUB, MVT::v4i32, Expand);
+  setOperationAction(ISD::SUB, MVT::v2i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
   setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
   setOperationAction(ISD::UREM, MVT::v4i32, Expand);
   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+  setOperationAction(ISD::XOR, MVT::v4i32, Expand);
+  setOperationAction(ISD::XOR, MVT::v2i32, Expand);
 
   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
@@ -70,6 +84,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT, MVT::i32, Custom);
   setOperationAction(ISD::SELECT, MVT::f32, Custom);
 
+  setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
+  setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
+
   // Legalize loads and stores to the private address space.
   setOperationAction(ISD::LOAD, MVT::i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -93,6 +110,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setTargetDAGCombine(ISD::SELECT_CC);
 
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
   setSchedulingPreference(Sched::VLIW);
 }
 
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
index b232188..37150c4 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "R600InstrInfo.h"
+#include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
 #include "R600Defines.h"
@@ -29,7 +30,8 @@ using namespace llvm;
 
 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
   : AMDGPUInstrInfo(tm),
-    RI(tm, *this)
+    RI(tm, *this),
+    ST(tm.getSubtarget<AMDGPUSubtarget>())
   { }
 
 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
@@ -139,6 +141,33 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
           (TargetFlags & R600_InstFlag::OP3));
 }
 
+bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
+  return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
+}
+
+bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
+  return isTransOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
+  return ST.hasVertexCache() && IS_VTX(get(Opcode));
+}
+
+bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
+  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
+  return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
+  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
+}
+
+bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
+  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
+  return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
+         usesTextureCache(MI->getOpcode());
+}
+
 bool
 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
     const {
@@ -183,10 +212,19 @@ R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
       int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
       if (SrcIdx < 0)
         break;
-      if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+      unsigned Reg = MI->getOperand(SrcIdx).getReg();
+      if (Reg == AMDGPU::ALU_CONST) {
         unsigned Const = MI->getOperand(
             getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
         Consts.push_back(Const);
+        continue;
+      }
+      if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
+          AMDGPU::R600_KC1RegClass.contains(Reg)) {
+        unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+        unsigned Chan = RI.getHWRegChan(Reg);
+        Consts.push_back((Index << 2) | Chan);
+        continue;
       }
     }
   }
@@ -684,7 +722,8 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
   //scheduling to the backend, we can change the default to 0.
   MIB.addImm(1)        // $last
       .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
-      .addImm(0);        // $literal
+      .addImm(0)         // $literal
+      .addImm(0);        // $bank_swizzle
 
   return MIB;
 }
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.h b/contrib/llvm/lib/Target/R600/R600InstrInfo.h
index dbae900..babe4b8 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.h
@@ -33,6 +33,7 @@ namespace llvm {
   class R600InstrInfo : public AMDGPUInstrInfo {
   private:
   const R600RegisterInfo RI;
+  const AMDGPUSubtarget &ST;
 
   int getBranchInstr(const MachineOperand &op) const;
 
@@ -53,6 +54,14 @@ namespace llvm {
   /// \returns true if this \p Opcode represents an ALU instruction.
   bool isALUInstr(unsigned Opcode) const;
 
+  bool isTransOnly(unsigned Opcode) const;
+  bool isTransOnly(const MachineInstr *MI) const;
+
+  bool usesVertexCache(unsigned Opcode) const;
+  bool usesVertexCache(const MachineInstr *MI) const;
+  bool usesTextureCache(unsigned Opcode) const;
+  bool usesTextureCache(const MachineInstr *MI) const;
+
   bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
   bool canBundle(const std::vector<MachineInstr *> &) const;
 
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td
index 663b41a..8f47523 100644
--- a/contrib/llvm/lib/Target/R600/R600Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R600Instructions.td
@@ -13,11 +13,12 @@
 
 include "R600Intrinsics.td"
 
-class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
+class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
                 InstrItinClass itin>
     : AMDGPUInst <outs, ins, asm, pattern> {
 
   field bits<64> Inst;
+  bit TransOnly = 0;
   bit Trig = 0;
   bit Op3 = 0;
   bit isVector = 0;
@@ -25,9 +26,9 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
   bit Op1 = 0;
   bit Op2 = 0;
   bit HasNativeOperands = 0;
+  bit VTXInst = 0;
+  bit TEXInst = 0;
 
-  bits<11> op_code = inst;
-  //let Inst = inst;
   let Namespace = "AMDGPU";
   let OutOperandList = outs;
   let InOperandList = ins;
@@ -35,6 +36,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
   let Pattern = pattern;
   let Itinerary = itin;
 
+  let TSFlags{0} = TransOnly;
   let TSFlags{4} = Trig;
   let TSFlags{5} = Op3;
 
@@ -45,11 +47,12 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
   let TSFlags{9} = HasNativeOperands;
   let TSFlags{10} = Op1;
   let TSFlags{11} = Op2;
+  let TSFlags{12} = VTXInst;
+  let TSFlags{13} = TEXInst;
 }
 
 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
-    AMDGPUInst <outs, ins, asm, pattern> {
-  field bits<64> Inst;
+    InstR600 <outs, ins, asm, pattern, NullALU> {
 
   let Namespace = "AMDGPU";
 }
@@ -74,6 +77,9 @@ class InstFlag<string PM = "printOperand", int Default = 0>
 def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
   let PrintMethod = "printSel";
 }
+def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
+  let PrintMethod = "printBankSwizzle";
+}
 
 def LITERAL : InstFlag<"printLiteral">;
 
@@ -137,7 +143,7 @@ class R600ALU_Word1 {
   field bits<32> Word1;
 
   bits<11> dst;
-  bits<3>  bank_swizzle = 0;
+  bits<3>  bank_swizzle;
   bits<1>  dst_rel;
   bits<1>  clamp;
 
@@ -346,15 +352,15 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
 // and R600InstrInfo::getOperandIdx().
 class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-    InstR600 <0,
-              (outs R600_Reg32:$dst),
+    InstR600 <(outs R600_Reg32:$dst),
               (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
                    R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
-                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+                   BANK_SWIZZLE:$bank_swizzle),
               !strconcat("  ", opName,
-                   "$clamp $dst$write$dst_rel$omod, "
+                   "$last$clamp $dst$write$dst_rel$omod, "
                    "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
-                   "$literal $pred_sel$last"),
+                   "$pred_sel $bank_swizzle"),
               pattern,
               itin>,
     R600ALU_Word0,
@@ -385,18 +391,18 @@ class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
 // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
 class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-  InstR600 <inst,
-          (outs R600_Reg32:$dst),
+  InstR600 <(outs R600_Reg32:$dst),
           (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
                OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
-               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+               BANK_SWIZZLE:$bank_swizzle),
           !strconcat("  ", opName,
-                "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+                "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
                 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
                 "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
-                "$literal $pred_sel$last"),
+                "$pred_sel $bank_swizzle"),
           pattern,
           itin>,
     R600ALU_Word0,
@@ -423,18 +429,19 @@ class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
 // R600InstrInfo::getOperandIdx().
 class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-  InstR600 <0,
-          (outs R600_Reg32:$dst),
+  InstR600 <(outs R600_Reg32:$dst),
           (ins REL:$dst_rel, CLAMP:$clamp,
                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
                R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
-               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-          !strconcat("  ", opName, "$clamp $dst$dst_rel, "
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+               BANK_SWIZZLE:$bank_swizzle),
+          !strconcat("  ", opName, "$last$clamp $dst$dst_rel, "
                              "$src0_neg$src0$src0_rel, "
                              "$src1_neg$src1$src1_rel, "
                              "$src2_neg$src2$src2_rel, "
-                             "$literal $pred_sel$last"),
+                             "$pred_sel"
+                             "$bank_swizzle"),
           pattern,
           itin>,
     R600ALU_Word0,
@@ -450,8 +457,7 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
 
 class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
                       InstrItinClass itin = VecALU> :
-  InstR600 <inst,
-          (outs R600_Reg32:$dst),
+  InstR600 <(outs R600_Reg32:$dst),
           ins,
           asm,
           pattern,
@@ -459,8 +465,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
 
 class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-  InstR600 <inst,
-          (outs R600_Reg128:$DST_GPR),
+  InstR600 <(outs R600_Reg128:$DST_GPR),
           (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
           !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
           pattern,
@@ -481,11 +486,14 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
     let FETCH_WHOLE_QUAD = 0;
     let ALT_CONST = 0;
     let SAMPLER_INDEX_MODE = 0;
+    let RESOURCE_INDEX_MODE = 0;
 
     let COORD_TYPE_X = 0;
     let COORD_TYPE_Y = 0;
     let COORD_TYPE_Z = 0;
     let COORD_TYPE_W = 0;
+
+    let TEXInst = 1;
   }
 
 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -738,7 +746,9 @@ multiclass SteamOutputExportPattern<Instruction ExportInst,
       4095, imm:$mask, buf3inst, 0)>;
 }
 
-let usesCustomInserter = 1 in {
+// Export Instructions should not be duplicated by TailDuplication pass
+// (which assumes that duplicable instruction are affected by exec mask)
+let usesCustomInserter = 1, isNotDuplicable = 1 in {
 
 class ExportSwzInst : InstR600ISA<(
     outs),
@@ -805,12 +815,15 @@ class CF_ALU_WORD1 {
   let Word1{31} = BARRIER;
 }
 
+def KCACHE : InstFlag<"printKCache">;
+
 class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
-(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
-i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
+KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
+i32imm:$COUNT),
 !strconcat(OpName, " $COUNT, @$ADDR, "
-"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
-", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
 [] >, CF_ALU_WORD0, CF_ALU_WORD1 {
   field bits<64> Inst;
 
@@ -823,109 +836,139 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
   let Inst{63-32} = Word1;
 }
 
-class CF_WORD0 {
+class CF_WORD0_R600 {
   field bits<32> Word0;
 
-  bits<24> ADDR;
-  bits<3> JUMPTABLE_SEL;
+  bits<32> ADDR;
 
-  let Word0{23-0} = ADDR;
-  let Word0{26-24} = JUMPTABLE_SEL;
+  let Word0 = ADDR;
 }
 
-class CF_WORD1 {
+class CF_WORD1_R600 {
   field bits<32> Word1;
 
   bits<3> POP_COUNT;
   bits<5> CF_CONST;
   bits<2> COND;
-  bits<6> COUNT;
+  bits<3> COUNT;
+  bits<6> CALL_COUNT;
+  bits<1> COUNT_3;
+  bits<1> END_OF_PROGRAM;
   bits<1> VALID_PIXEL_MODE;
-  bits<8> CF_INST;
+  bits<7> CF_INST;
+  bits<1> WHOLE_QUAD_MODE;
   bits<1> BARRIER;
 
   let Word1{2-0} = POP_COUNT;
   let Word1{7-3} = CF_CONST;
   let Word1{9-8} = COND;
-  let Word1{15-10} = COUNT;
-  let Word1{20} = VALID_PIXEL_MODE;
-  let Word1{29-22} = CF_INST;
+  let Word1{12-10} = COUNT;
+  let Word1{18-13} = CALL_COUNT;
+  let Word1{19} = COUNT_3;
+  let Word1{21} = END_OF_PROGRAM;
+  let Word1{22} = VALID_PIXEL_MODE;
+  let Word1{29-23} = CF_INST;
+  let Word1{30} = WHOLE_QUAD_MODE;
   let Word1{31} = BARRIER;
 }
 
-class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
-ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
   field bits<64> Inst;
 
   let CF_INST = inst;
   let BARRIER = 1;
-  let JUMPTABLE_SEL = 0;
   let CF_CONST = 0;
   let VALID_PIXEL_MODE = 0;
   let COND = 0;
+  let CALL_COUNT = 0;
+  let COUNT_3 = 0;
+  let END_OF_PROGRAM = 0;
+  let WHOLE_QUAD_MODE = 0;
 
   let Inst{31-0} = Word0;
   let Inst{63-32} = Word1;
 }
 
-def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
-"TEX $COUNT @$ADDR"> {
-  let POP_COUNT = 0;
-}
-
-def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
-"VTX $COUNT @$ADDR"> {
-  let POP_COUNT = 0;
-}
+class CF_WORD0_EG {
+  field bits<32> Word0;
 
-def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
-}
+  bits<24> ADDR;
+  bits<3> JUMPTABLE_SEL;
 
-def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
+  let Word0{23-0} = ADDR;
+  let Word0{26-24} = JUMPTABLE_SEL;
 }
 
-def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
-}
+class CF_WORD1_EG {
+  field bits<32> Word1;
 
-def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
-}
+  bits<3> POP_COUNT;
+  bits<5> CF_CONST;
+  bits<2> COND;
+  bits<6> COUNT;
+  bits<1> VALID_PIXEL_MODE;
+  bits<1> END_OF_PROGRAM;
+  bits<8> CF_INST;
+  bits<1> BARRIER;
 
-def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
-  let COUNT = 0;
+  let Word1{2-0} = POP_COUNT;
+  let Word1{7-3} = CF_CONST;
+  let Word1{9-8} = COND;
+  let Word1{15-10} = COUNT;
+  let Word1{20} = VALID_PIXEL_MODE;
+  let Word1{21} = END_OF_PROGRAM;
+  let Word1{29-22} = CF_INST;
+  let Word1{31} = BARRIER;
 }
 
-def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
-  let COUNT = 0;
-}
+class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
+  field bits<64> Inst;
 
-def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
-  let ADDR = 0;
-  let COUNT = 0;
-  let POP_COUNT = 0;
-}
+  let CF_INST = inst;
+  let BARRIER = 1;
+  let JUMPTABLE_SEL = 0;
+  let CF_CONST = 0;
+  let VALID_PIXEL_MODE = 0;
+  let COND = 0;
+  let END_OF_PROGRAM = 0;
 
-def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
-  let COUNT = 0;
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
 }
 
 def CF_ALU : ALU_CLAUSE<8, "ALU">;
 def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
 
-def STACK_SIZE : AMDGPUInst <(outs),
-(ins i32imm:$num), "nstack $num", [] > {
+def FETCH_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
   field bits<8> Inst;
   bits<8> num;
   let Inst = num;
 }
 
+def ALU_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
+  field bits<8> Inst;
+  bits<8> num;
+  let Inst = num;
+}
+
+def LITERALS : AMDGPUInst <(outs),
+(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
+  field bits<64> Inst;
+  bits<32> literal1;
+  bits<32> literal2;
+
+  let Inst{31-0} = literal1;
+  let Inst{63-32} = literal2;
+}
+
+def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
+  field bits<64> Inst;
+}
+
 let Predicates = [isR600toCayman] in {
 
 //===----------------------------------------------------------------------===//
@@ -944,58 +987,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
 def SETE : R600_2OP <
   0x08, "SETE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-             COND_EQ))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
 >;
 
 def SGT : R600_2OP <
   0x09, "SETGT",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-              COND_GT))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
 >;
 
 def SGE : R600_2OP <
   0xA, "SETGE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-              COND_GE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
 >;
 
 def SNE : R600_2OP <
   0xB, "SETNE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-    COND_NE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
 >;
 
 def SETE_DX10 : R600_2OP <
   0xC, "SETE_DX10",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-    COND_EQ))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
 >;
 
 def SETGT_DX10 : R600_2OP <
   0xD, "SETGT_DX10",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-    COND_GT))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
 >;
 
 def SETGE_DX10 : R600_2OP <
   0xE, "SETGE_DX10",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-    COND_GE))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
 >;
 
 def SETNE_DX10 : R600_2OP <
   0xF, "SETNE_DX10",
-  [(set R600_Reg32:$dst,
-    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-     COND_NE))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
 >;
 
 def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
@@ -1053,38 +1080,32 @@ def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
 
 def SETE_INT : R600_2OP <
   0x3A, "SETE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
 >;
 
 def SETGT_INT : R600_2OP <
   0x3B, "SETGT_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
 >;
 
 def SETGE_INT : R600_2OP <
   0x3C, "SETGE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
 >;
 
 def SETNE_INT : R600_2OP <
   0x3D, "SETNE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
 >;
 
 def SETGT_UINT : R600_2OP <
   0x3E, "SETGT_UINT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
 >;
 
 def SETGE_UINT : R600_2OP <
   0x3F, "SETGE_UINT",
-  [(set (i32 R600_Reg32:$dst),
-    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
 >;
 
 def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
@@ -1094,26 +1115,17 @@ def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
 
 def CNDE_INT : R600_3OP <
   0x1C, "CNDE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), 0,
-       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-       COND_EQ))]
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
 >;
 
 def CNDGE_INT : R600_3OP <
   0x1E, "CNDGE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), 0,
-       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-       COND_GE))]
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
 >;
 
 def CNDGT_INT : R600_3OP <
   0x1D, "CNDGT_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), 0,
-       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-       COND_GT))]
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1122,7 +1134,7 @@ def CNDGT_INT : R600_3OP <
 
 def TEX_LD : R600_TEX <
   0x03, "TEX_LD",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
       imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
       imm:$SAMPLER_ID, imm:$textureTarget))]
 > {
@@ -1135,19 +1147,19 @@ let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
 
 def TEX_GET_TEXTURE_RESINFO : R600_TEX <
   0x04, "TEX_GET_TEXTURE_RESINFO",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_GET_GRADIENTS_H : R600_TEX <
   0x07, "TEX_GET_GRADIENTS_H",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_GET_GRADIENTS_V : R600_TEX <
   0x08, "TEX_GET_GRADIENTS_V",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
@@ -1163,37 +1175,37 @@ def TEX_SET_GRADIENTS_V : R600_TEX <
 
 def TEX_SAMPLE : R600_TEX <
   0x10, "TEX_SAMPLE",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C : R600_TEX <
   0x18, "TEX_SAMPLE_C",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
 def TEX_SAMPLE_L : R600_TEX <
   0x11, "TEX_SAMPLE_L",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C_L : R600_TEX <
   0x19, "TEX_SAMPLE_C_L",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
 def TEX_SAMPLE_LB : R600_TEX <
   0x12, "TEX_SAMPLE_LB",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C_LB : R600_TEX <
   0x1A, "TEX_SAMPLE_C_LB",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
@@ -1223,32 +1235,22 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
 
 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
   inst, "MULADD_IEEE",
-  [(set (f32 R600_Reg32:$dst),
-   (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
+  [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
 >;
 
 class CNDE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-       COND_EQ))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
 >;
 
 class CNDGT_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGT",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-       COND_GT))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
 >;
 
 class CNDGE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-       COND_GE))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
 >;
 
 multiclass DOT4_Common <bits<11> inst> {
@@ -1256,7 +1258,7 @@ multiclass DOT4_Common <bits<11> inst> {
   def _pseudo : R600_REDUCTION <inst,
     (ins R600_Reg128:$src0, R600_Reg128:$src1),
     "DOT4 $dst $src0, $src1",
-    [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+    [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))]
   >;
 
   def _real : R600_2OP <inst, "DOT4", []>;
@@ -1266,11 +1268,10 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
 multiclass CUBE_Common <bits<11> inst> {
 
   def _pseudo : InstR600 <
-    inst,
     (outs R600_Reg128:$dst),
     (ins R600_Reg128:$src),
     "CUBE $dst $src",
-    [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+    [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))],
     VecALU
   > {
     let isPseudo = 1;
@@ -1282,23 +1283,38 @@ multiclass CUBE_Common <bits<11> inst> {
 
 class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "EXP_IEEE", fexp2
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "FLT_TO_INT", fp_to_sint
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "INT_TO_FLT", sint_to_fp
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "FLT_TO_UINT", fp_to_uint
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "UINT_TO_FLT", uint_to_fp
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
   inst, "LOG_CLAMPED", []
@@ -1306,50 +1322,84 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
 
 class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "LOG_IEEE", flog2
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
 class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
 class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
 class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
   inst, "MULHI_INT", mulhs
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
   inst, "MULHI", mulhu
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
   inst, "MULLO_INT", mul
->;
-class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
   inst, "RECIP_CLAMPED", []
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
-  inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
->;
+  inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "RECIP_UINT", AMDGPUurecip
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
   inst, "RECIPSQRT_IEEE", []
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class SIN_Common <bits<11> inst> : R600_1OP <
   inst, "SIN", []>{
   let Trig = 1;
+  let TransOnly = 1;
+  let Itinerary = TransALU;
 }
 
 class COS_Common <bits<11> inst> : R600_1OP <
   inst, "COS", []> {
   let Trig = 1;
+  let TransOnly = 1;
+  let Itinerary = TransALU;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1358,19 +1408,20 @@ class COS_Common <bits<11> inst> : R600_1OP <
 
 multiclass DIV_Common <InstR600 recip_ieee> {
 def : Pat<
-  (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
-  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+  (int_AMDGPU_div f32:$src0, f32:$src1),
+  (MUL_IEEE $src0, (recip_ieee $src1))
 >;
 
 def : Pat<
-  (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
-  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+  (fdiv f32:$src0, f32:$src1),
+  (MUL_IEEE $src0, (recip_ieee $src1))
 >;
 }
 
-class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
-  (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
-  (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
+  : Pat <
+  (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
+  (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1410,14 +1461,13 @@ let Predicates = [isR600] in {
   def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
 
   defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
-  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
+  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
   def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
 
-  def : Pat<(fsqrt R600_Reg32:$src),
-    (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
+  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
 
   def R600_ExportSwz : ExportSwzInst {
-    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{20-17} = 0; // BURST_COUNT
     let Word1{21} = eop;
     let Word1{22} = 1; // VALID_PIXEL_MODE
     let Word1{30-23} = inst;
@@ -1426,25 +1476,77 @@ let Predicates = [isR600] in {
   defm : ExportPattern<R600_ExportSwz, 39>;
 
   def R600_ExportBuf : ExportBufInst {
-    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{20-17} = 0; // BURST_COUNT
     let Word1{21} = eop;
     let Word1{22} = 1; // VALID_PIXEL_MODE
     let Word1{30-23} = inst;
     let Word1{31} = 1; // BARRIER
   }
   defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+
+  def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "TEX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "VTX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
+    let ADDR = 0;
+    let COUNT = 0;
+    let POP_COUNT = 0;
+  }
+  def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
+    let COUNT = 0;
+    let POP_COUNT = 0;
+    let ADDR = 0;
+    let END_OF_PROGRAM = 1;
+  }
+
 }
 
 // Helper pattern for normalizing inputs to triginomic instructions for R700+
 // cards.
 class COS_PAT <InstR600 trig> : Pat<
-  (fcos R600_Reg32:$src),
-  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+  (fcos f32:$src),
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
 >;
 
 class SIN_PAT <InstR600 trig> : Pat<
-  (fsin R600_Reg32:$src),
-  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+  (fsin f32:$src),
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1482,11 +1584,10 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
 def SIN_eg : SIN_Common<0x8D>;
 def COS_eg : COS_Common<0x8E>;
 
-def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
 def : SIN_PAT <SIN_eg>;
 def : COS_PAT <COS_eg>;
-def : Pat<(fsqrt R600_Reg32:$src),
-  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
+def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
 } // End Predicates = [isEG]
 
 //===----------------------------------------------------------------------===//
@@ -1510,15 +1611,17 @@ let Predicates = [isEGorCayman] in {
   // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
   // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
   def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
-    [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
-                                                      R600_Reg32:$src1,
-                                                      R600_Reg32:$src2))],
+    [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
+                                               i32:$src2))],
     VecALU
   >;
+  def : BFEPattern <BFE_UINT_eg>;
+
+  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
+  defm : BFIPatterns <BFI_INT_eg>;
 
   def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
-    [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
-                                          R600_Reg32:$src2))],
+    [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
     VecALU
   >;
 
@@ -1563,14 +1666,15 @@ let hasSideEffects = 1 in {
   // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
   // which do not need to be truncated since the fp values are 0.0f or 1.0f.
   // We should look into handling these cases separately.
-  def : Pat<(fp_to_sint R600_Reg32:$src0),
-    (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
+  def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
+
+  def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
 
-  def : Pat<(fp_to_uint R600_Reg32:$src0),
-    (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+  // SHA-256 Patterns
+  def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
 
   def EG_ExportSwz : ExportSwzInst {
-    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{19-16} = 0; // BURST_COUNT
     let Word1{20} = 1; // VALID_PIXEL_MODE
     let Word1{21} = eop;
     let Word1{29-22} = inst;
@@ -1580,7 +1684,7 @@ let hasSideEffects = 1 in {
   defm : ExportPattern<EG_ExportSwz, 83>;
 
   def EG_ExportBuf : ExportBufInst {
-    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{19-16} = 0; // BURST_COUNT
     let Word1{20} = 1; // VALID_PIXEL_MODE
     let Word1{21} = eop;
     let Word1{29-22} = inst;
@@ -1589,6 +1693,57 @@ let hasSideEffects = 1 in {
   }
   defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
 
+  def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "TEX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "VTX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
+    let ADDR = 0;
+    let COUNT = 0;
+    let POP_COUNT = 0;
+  }
+  def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_END_EG :  CF_CLAUSE_EG<0, (ins), "CF_END"> {
+    let COUNT = 0;
+    let POP_COUNT = 0;
+    let ADDR = 0;
+    let END_OF_PROGRAM = 1;
+  }
+
 //===----------------------------------------------------------------------===//
 // Memory read/write instructions
 //===----------------------------------------------------------------------===//
@@ -1618,14 +1773,14 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
 def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
   (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   0x1, "RAT_WRITE_CACHELESS_32_eg",
-  [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
+  [(global_store i32:$rw_gpr, i32:$index_gpr)]
 >;
 
 //128-bit store
 def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
   (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   0xf, "RAT_WRITE_CACHELESS_128",
-  [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
+  [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
 >;
 
 class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
@@ -1679,6 +1834,8 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
   // VTX_WORD3 (Padding)
   //
   // Inst{127-96} = 0;
+
+  let VTXInst = 1;
 }
 
 class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
@@ -1748,19 +1905,19 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
 //===----------------------------------------------------------------------===//
 
 def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
-  [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
 >;
 
 def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
-  [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
 >;
 
 def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
-  [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
 >;
 
 def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
-  [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+  [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1769,17 +1926,17 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
 
 // 8-bit reads
 def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
-  [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
 >;
 
 // 32-bit reads
 def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
-  [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
 >;
 
 // 128-bit reads
 def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
-  [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+  [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1788,7 +1945,7 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
 //===----------------------------------------------------------------------===//
 
 def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
-  [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
 >;
 
 }
@@ -1818,22 +1975,27 @@ def SIN_cm : SIN_Common<0x8D>;
 def COS_cm : COS_Common<0x8E>;
 } // End isVector = 1
 
-def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
 def : SIN_PAT <SIN_cm>;
 def : COS_PAT <COS_cm>;
 
 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
 
 // RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
 def : Pat <
-  (AMDGPUurecip R600_Reg32:$src0),
-  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
-                            (MOV_IMM_I32 0x4f800000)))
+  (AMDGPUurecip i32:$src0),
+  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
+                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
 >;
 
+  def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
+    let ADDR = 0;
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
 
-def : Pat<(fsqrt R600_Reg32:$src),
-  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
 
 } // End isCayman
 
@@ -1855,21 +2017,21 @@ def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
 let isPseudo = 1 in {
 
 def PRED_X : InstR600 <
-  0, (outs R600_Predicate_Bit:$dst),
+  (outs R600_Predicate_Bit:$dst),
   (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
   "", [], NullALU> {
   let FlagOperandIdx = 3;
 }
 
 let isTerminator = 1, isBranch = 1 in {
-def JUMP_COND : InstR600 <0x10,
+def JUMP_COND : InstR600 <
           (outs),
           (ins brtarget:$target, R600_Predicate_Bit:$p),
           "JUMP $target ($p)",
           [], AnyALU
   >;
 
-def JUMP : InstR600 <0x10,
+def JUMP : InstR600 <
           (outs),
           (ins brtarget:$target),
           "JUMP $target",
@@ -1896,20 +2058,28 @@ def MASK_WRITE : AMDGPUShaderInst <
 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
 
 
-def TXD: AMDGPUShaderInst <
+def TXD: InstR600 <
   (outs R600_Reg128:$dst),
-  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
->;
+  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+                     imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
+  NullALU > {
+  let TEXInst = 1;
+}
 
-def TXD_SHADOW: AMDGPUShaderInst <
+def TXD_SHADOW: InstR600 <
   (outs R600_Reg128:$dst),
-  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
->;
-
+  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+        imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
+   NullALU
+> {
+  let TEXInst = 1;
+}
 } // End isPseudo = 1
 } // End usesCustomInserter = 1
 
@@ -1946,7 +2116,7 @@ def CONST_COPY : Instruction {
 
 def TEX_VTX_CONSTBUF :
   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
-      [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
   VTX_WORD1_GPR, VTX_WORD0 {
 
   let VC_INST = 0;
@@ -1995,11 +2165,12 @@ def TEX_VTX_CONSTBUF :
 // VTX_WORD3 (Padding)
 //
 // Inst{127-96} = 0;
+  let VTXInst = 1;
 }
 
 def TEX_VTX_TEXBUF:
   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
-      [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+      [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
 VTX_WORD1_GPR, VTX_WORD0 {
 
 let VC_INST = 0;
@@ -2048,6 +2219,7 @@ let Inst{63-32} = Word1;
 // VTX_WORD3 (Padding)
 //
 // Inst{127-96} = 0;
+  let VTXInst = 1;
 }
 
 
@@ -2124,9 +2296,8 @@ let isTerminator=1 in {
 // CND*_INT Pattterns for f32 True / False values
 
 class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
-  (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
-                                            R600_Reg32:$src2, cc),
-  (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+  (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
+  (cnd $src0, $src1, $src2)
 >;
 
 def : CND_INT_f32 <CNDE_INT,  SETEQ>;
@@ -2135,9 +2306,8 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>;
 
 //CNDGE_INT extra pattern
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
-                                        (i32 R600_Reg32:$src2), COND_GT),
-  (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+  (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
+  (CNDGE_INT $src0, $src1, $src2)
 >;
 
 // KIL Patterns
@@ -2147,56 +2317,56 @@ def KILP : Pat <
 >;
 
 def KIL : Pat <
-  (int_AMDGPU_kill R600_Reg32:$src0),
-  (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+  (int_AMDGPU_kill f32:$src0),
+  (MASK_WRITE (KILLGT (f32 ZERO), $src0))
 >;
 
 // SGT Reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
-  (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
+  (SGT $src1, $src0)
 >;
 
 // SGE Reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
-  (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
+  (SGE $src1, $src0)
 >;
 
 // SETGT_DX10 reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
-  (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
+  (SETGT_DX10 $src1, $src0)
 >;
 
 // SETGE_DX10 reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
-  (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
+  (SETGE_DX10 $src1, $src0)
 >;
 
 // SETGT_INT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
-  (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
+  (SETGT_INT $src1, $src0)
 >;
 
 // SETGE_INT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
-  (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
+  (SETGE_INT $src1, $src0)
 >;
 
 // SETGT_UINT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
-  (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
+  (SETGT_UINT $src1, $src0)
 >;
 
 // SETGE_UINT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
-  (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
+  (SETGE_UINT $src1, $src0)
 >;
 
 // The next two patterns are special cases for handling 'true if ordered' and
@@ -2209,50 +2379,50 @@ def : Pat <
 
 //SETE - 'true if ordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
-  (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
+  (SETE $src0, $src1)
 >;
 
 //SETE_DX10 - 'true if ordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
-  (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
+  (SETE_DX10 $src0, $src1)
 >;
 
 //SNE - 'true if unordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
-  (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
+  (SNE $src0, $src1)
 >;
 
 //SETNE_DX10 - 'true if ordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
-  (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
+  (SETNE_DX10 $src0, $src1)
 >;
 
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
+def : Extract_Element <f32, v4f32, 0, sub0>;
+def : Extract_Element <f32, v4f32, 1, sub1>;
+def : Extract_Element <f32, v4f32, 2, sub2>;
+def : Extract_Element <f32, v4f32, 3, sub3>;
 
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
+def : Insert_Element <f32, v4f32, 0, sub0>;
+def : Insert_Element <f32, v4f32, 1, sub1>;
+def : Insert_Element <f32, v4f32, 2, sub2>;
+def : Insert_Element <f32, v4f32, 3, sub3>;
 
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
+def : Extract_Element <i32, v4i32, 0, sub0>;
+def : Extract_Element <i32, v4i32, 1, sub1>;
+def : Extract_Element <i32, v4i32, 2, sub2>;
+def : Extract_Element <i32, v4i32, 3, sub3>;
 
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
+def : Insert_Element <i32, v4i32, 0, sub0>;
+def : Insert_Element <i32, v4i32, 1, sub1>;
+def : Insert_Element <i32, v4i32, 2, sub2>;
+def : Insert_Element <i32, v4i32, 3, sub3>;
 
-def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, f32>;
+def : Vector4_Build <v4i32, i32>;
 
 // bitconvert patterns
 
diff --git a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
index 99c1f91..70fddbb 100644
--- a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
@@ -25,6 +25,7 @@ public:
   R600MachineFunctionInfo(const MachineFunction &MF);
   SmallVector<unsigned, 4> LiveOuts;
   std::vector<unsigned> IndirectRegs;
+  unsigned StackSize;
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
new file mode 100644
index 0000000..cd7b7d0
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
@@ -0,0 +1,459 @@
+//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass implements instructions packetization for R600. It unsets isLast
+/// bit of instructions inside a bundle and substitutes src register with
+/// PreviousVector when applicable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600PACKETIZER_CPP
+#define R600PACKETIZER_CPP
+
+#define DEBUG_TYPE "packets"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+
+namespace llvm {
+
+class R600Packetizer : public MachineFunctionPass {
+
+public:
+  static char ID;
+  R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const {
+    return "R600 Packetizer";
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn);
+};
+char R600Packetizer::ID = 0;
+
+class R600PacketizerList : public VLIWPacketizerList {
+
+private:
+  const R600InstrInfo *TII;
+  const R600RegisterInfo &TRI;
+
+  enum BankSwizzle {
+    ALU_VEC_012 = 0,
+    ALU_VEC_021,
+    ALU_VEC_120,
+    ALU_VEC_102,
+    ALU_VEC_201,
+    ALU_VEC_210
+  };
+
+  unsigned getSlot(const MachineInstr *MI) const {
+    return TRI.getHWRegChan(MI->getOperand(0).getReg());
+  }
+
+  /// \returns register to PV chan mapping for bundle/single instructions that
+  /// immediatly precedes I.
+  DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
+      const {
+    DenseMap<unsigned, unsigned> Result;
+    I--;
+    if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
+      return Result;
+    MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+    if (I->isBundle())
+      BI++;
+    do {
+      if (TII->isPredicated(BI))
+        continue;
+      if (TII->isTransOnly(BI))
+        continue;
+      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE);
+      if (OperandIdx < 0)
+        continue;
+      if (BI->getOperand(OperandIdx).getImm() == 0)
+        continue;
+      unsigned Dst = BI->getOperand(0).getReg();
+      if (BI->getOpcode() == AMDGPU::DOT4_r600_real) {
+        Result[Dst] = AMDGPU::PV_X;
+        continue;
+      }
+      unsigned PVReg = 0;
+      switch (TRI.getHWRegChan(Dst)) {
+      case 0:
+        PVReg = AMDGPU::PV_X;
+        break;
+      case 1:
+        PVReg = AMDGPU::PV_Y;
+        break;
+      case 2:
+        PVReg = AMDGPU::PV_Z;
+        break;
+      case 3:
+        PVReg = AMDGPU::PV_W;
+        break;
+      default:
+        llvm_unreachable("Invalid Chan");
+      }
+      Result[Dst] = PVReg;
+    } while ((++BI)->isBundledWithPred());
+    return Result;
+  }
+
+  void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
+      const {
+    R600Operands::Ops Ops[] = {
+      R600Operands::SRC0,
+      R600Operands::SRC1,
+      R600Operands::SRC2
+    };
+    for (unsigned i = 0; i < 3; i++) {
+      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+      if (OperandIdx < 0)
+        continue;
+      unsigned Src = MI->getOperand(OperandIdx).getReg();
+      const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
+      if (It != PVs.end())
+        MI->getOperand(OperandIdx).setReg(It->second);
+    }
+  }
+public:
+  // Ctor.
+  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                        MachineDominatorTree &MDT)
+  : VLIWPacketizerList(MF, MLI, MDT, true),
+    TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
+    TRI(TII->getRegisterInfo()) { }
+
+  // initPacketizerState - initialize some internal flags.
+  void initPacketizerState() { }
+
+  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+  bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
+    return false;
+  }
+
+  // isSoloInstruction - return true if instruction MI can not be packetized
+  // with any other instruction, which means that MI itself is a packet.
+  bool isSoloInstruction(MachineInstr *MI) {
+    if (TII->isVector(*MI))
+      return true;
+    if (!TII->isALUInstr(MI->getOpcode()))
+      return true;
+    if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
+      return true;
+    if (TII->isTransOnly(MI))
+      return true;
+    return false;
+  }
+
+  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+  // together.
+  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+    MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
+    if (getSlot(MII) <= getSlot(MIJ))
+      return false;
+    // Does MII and MIJ share the same pred_sel ?
+    int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL),
+        OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600Operands::PRED_SEL);
+    unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
+        PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
+    if (PredI != PredJ)
+      return false;
+    if (SUJ->isSucc(SUI)) {
+      for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
+        const SDep &Dep = SUJ->Succs[i];
+        if (Dep.getSUnit() != SUI)
+          continue;
+        if (Dep.getKind() == SDep::Anti)
+          continue;
+        if (Dep.getKind() == SDep::Output)
+          if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
+            continue;
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+  // and SUJ.
+  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
+
+  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
+    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600Operands::LAST);
+    MI->getOperand(LastOp).setImm(Bit);
+  }
+
+  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
+    CurrentPacketMIs.push_back(MI);
+    bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
+    DEBUG(
+      if (!FitsConstLimits) {
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Consts read limitations\n";
+      });
+    const DenseMap<unsigned, unsigned> &PV =
+        getPreviousVector(CurrentPacketMIs.front());
+    bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV);
+    DEBUG(
+      if (!FitsReadPortLimits) {
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Read port limitations\n";
+      });
+    bool isBundlable = FitsConstLimits && FitsReadPortLimits;
+    CurrentPacketMIs.pop_back();
+    if (!isBundlable) {
+      endPacket(MI->getParent(), MI);
+      substitutePV(MI, getPreviousVector(MI));
+      return VLIWPacketizerList::addToPacket(MI);
+    }
+    if (!CurrentPacketMIs.empty())
+      setIsLastBit(CurrentPacketMIs.back(), 0);
+    substitutePV(MI, PV);
+    return VLIWPacketizerList::addToPacket(MI);
+  }
+private:
+  std::vector<std::pair<int, unsigned> >
+  ExtractSrcs(const MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV)
+      const {
+    R600Operands::Ops Ops[] = {
+      R600Operands::SRC0,
+      R600Operands::SRC1,
+      R600Operands::SRC2
+    };
+    std::vector<std::pair<int, unsigned> > Result;
+    for (unsigned i = 0; i < 3; i++) {
+      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+      if (OperandIdx < 0){
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Src = MI->getOperand(OperandIdx).getReg();
+      if (PV.find(Src) != PV.end()) {
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Reg = TRI.getEncodingValue(Src) & 0xff;
+      if (Reg > 127) {
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Chan = TRI.getHWRegChan(Src);
+      Result.push_back(std::pair<int, unsigned>(Reg, Chan));
+    }
+    return Result;
+  }
+
+  std::vector<std::pair<int, unsigned> >
+  Swizzle(std::vector<std::pair<int, unsigned> > Src,
+  BankSwizzle Swz) const {
+    switch (Swz) {
+    case ALU_VEC_012:
+      break;
+    case ALU_VEC_021:
+      std::swap(Src[1], Src[2]);
+      break;
+    case ALU_VEC_102:
+      std::swap(Src[0], Src[1]);
+      break;
+    case ALU_VEC_120:
+      std::swap(Src[0], Src[1]);
+      std::swap(Src[0], Src[2]);
+      break;
+    case ALU_VEC_201:
+      std::swap(Src[0], Src[2]);
+      std::swap(Src[0], Src[1]);
+      break;
+    case ALU_VEC_210:
+      std::swap(Src[0], Src[2]);
+      break;
+    }
+    return Src;
+  }
+
+  bool isLegal(const std::vector<MachineInstr *> &IG,
+      const std::vector<BankSwizzle> &Swz,
+      const DenseMap<unsigned, unsigned> &PV) const {
+    assert (Swz.size() == IG.size());
+    int Vector[4][3];
+    memset(Vector, -1, sizeof(Vector));
+    for (unsigned i = 0, e = IG.size(); i < e; i++) {
+      const std::vector<std::pair<int, unsigned> > &Srcs =
+          Swizzle(ExtractSrcs(IG[i], PV), Swz[i]);
+      for (unsigned j = 0; j < 3; j++) {
+        const std::pair<int, unsigned> &Src = Srcs[j];
+        if (Src.first < 0)
+          continue;
+        if (Vector[Src.second][j] < 0)
+          Vector[Src.second][j] = Src.first;
+        if (Vector[Src.second][j] != Src.first)
+          return false;
+      }
+    }
+    return true;
+  }
+
+  bool recursiveFitsFPLimitation(
+  std::vector<MachineInstr *> IG,
+  const DenseMap<unsigned, unsigned> &PV,
+  std::vector<BankSwizzle> &SwzCandidate,
+  std::vector<MachineInstr *> CurrentlyChecked)
+      const {
+    if (!isLegal(CurrentlyChecked, SwzCandidate, PV))
+      return false;
+    if (IG.size() == CurrentlyChecked.size()) {
+      return true;
+    }
+    BankSwizzle AvailableSwizzle[] = {
+      ALU_VEC_012,
+      ALU_VEC_021,
+      ALU_VEC_120,
+      ALU_VEC_102,
+      ALU_VEC_201,
+      ALU_VEC_210
+    };
+    CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]);
+    for (unsigned i = 0; i < 6; i++) {
+      SwzCandidate.push_back(AvailableSwizzle[i]);
+      if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked))
+        return true;
+      SwzCandidate.pop_back();
+    }
+    return false;
+  }
+
+  bool fitsReadPortLimitation(
+  std::vector<MachineInstr *> IG,
+  const DenseMap<unsigned, unsigned> &PV)
+      const {
+    //Todo : support shared src0 - src1 operand
+    std::vector<BankSwizzle> SwzCandidate;
+    bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate,
+        std::vector<MachineInstr *>());
+    if (!Result)
+      return false;
+    for (unsigned i = 0, e = IG.size(); i < e; i++) {
+      MachineInstr *MI = IG[i];
+      unsigned Op = TII->getOperandIdx(MI->getOpcode(),
+          R600Operands::BANK_SWIZZLE);
+      MI->getOperand(Op).setImm(SwzCandidate[i]);
+    }
+    return true;
+  }
+};
+
+bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+  // Instantiate the packetizer.
+  R600PacketizerList Packetizer(Fn, MLI, MDT);
+
+  // DFA state table should not be empty.
+  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+  //
+  // Loop over all basic blocks and remove KILL pseudo-instructions
+  // These instructions confuse the dependence analysis. Consider:
+  // D0 = ...   (Insn 0)
+  // R0 = KILL R0, D0 (Insn 1)
+  // R0 = ... (Insn 2)
+  // Here, Insn 1 will result in the dependence graph not emitting an output
+  // dependence between Insn 0 and Insn 2. This can lead to incorrect
+  // packetization
+  //
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    MachineBasicBlock::iterator End = MBB->end();
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != End) {
+      if (MI->isKill()) {
+        MachineBasicBlock::iterator DeleteMI = MI;
+        ++MI;
+        MBB->erase(DeleteMI);
+        End = MBB->end();
+        continue;
+      }
+      ++MI;
+    }
+  }
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    // Find scheduling regions and schedule / packetize each region.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin();) {
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+          break;
+      }
+      I = MBB->begin();
+
+      // Skip empty scheduling regions.
+      if (I == RegionEnd) {
+        RegionEnd = llvm::prior(RegionEnd);
+        --RemainingCount;
+        continue;
+      }
+      // Skip regions with one instruction.
+      if (I == llvm::prior(RegionEnd)) {
+        RegionEnd = llvm::prior(RegionEnd);
+        continue;
+      }
+
+      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+      RegionEnd = I;
+    }
+  }
+
+  return true;
+
+}
+
+}
+
+llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
+  return new R600Packetizer(tm);
+}
+
+#endif // R600PACKETIZER_CPP
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
index 03f4976..bfc546b 100644
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
@@ -88,8 +88,14 @@ def NEG_ONE : R600Reg<"-1.0", 249>;
 def ONE_INT : R600Reg<"1", 250>;
 def HALF : R600Reg<"0.5", 252>;
 def NEG_HALF : R600Reg<"-0.5", 252>;
-def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
-def PV_X : R600Reg<"pv.x", 254>;
+def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">;
+def ALU_LITERAL_Y : R600RegWithChan<"literal.y", 253, "Y">;
+def ALU_LITERAL_Z : R600RegWithChan<"literal.z", 253, "Z">;
+def ALU_LITERAL_W : R600RegWithChan<"literal.w", 253, "W">;
+def PV_X : R600RegWithChan<"PV.x", 254, "X">;
+def PV_Y : R600RegWithChan<"PV.y", 254, "Y">;
+def PV_Z : R600RegWithChan<"PV.z", 254, "Z">;
+def PV_W : R600RegWithChan<"PV.w", 254, "W">;
 def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
 def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
 def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
diff --git a/contrib/llvm/lib/Target/R600/R600Schedule.td b/contrib/llvm/lib/Target/R600/R600Schedule.td
index 7ede181..78a460a 100644
--- a/contrib/llvm/lib/Target/R600/R600Schedule.td
+++ b/contrib/llvm/lib/Target/R600/R600Schedule.td
@@ -24,7 +24,7 @@ def AnyALU : InstrItinClass;
 def VecALU : InstrItinClass;
 def TransALU : InstrItinClass;
 
-def R600_EG_Itin : ProcessorItineraries <
+def R600_VLIW5_Itin : ProcessorItineraries <
   [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
   [],
   [
@@ -34,3 +34,14 @@ def R600_EG_Itin : ProcessorItineraries <
     InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
   ]
 >;
+
+def R600_VLIW4_Itin : ProcessorItineraries <
+  [ALU_X, ALU_Y, ALU_Z, ALU_W, ALU_NULL],
+  [],
+  [
+    InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
+    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+    InstrItinData<TransALU, [InstrStage<1, [ALU_NULL]>]>,
+    InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+  ]
+>;
diff --git a/contrib/llvm/lib/Target/R600/SIDefines.h b/contrib/llvm/lib/Target/R600/SIDefines.h
new file mode 100644
index 0000000..716b093
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIDefines.h
@@ -0,0 +1,22 @@
+//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef SIDEFINES_H_
+#define SIDEFINES_H_
+
+#define R_00B028_SPI_SHADER_PGM_RSRC1_PS                                0x00B028
+#define R_00B128_SPI_SHADER_PGM_RSRC1_VS                                0x00B128
+#define R_00B228_SPI_SHADER_PGM_RSRC1_GS                                0x00B228
+#define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
+#define   S_00B028_VGPRS(x)                                           (((x) & 0x3F) << 0)
+#define   S_00B028_SGPRS(x)                                           (((x) & 0x0F) << 6)
+#define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
+
+#endif // SIDEFINES_H_
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
index 6f0c307..6bd82a5 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -49,6 +49,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 
   addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
   addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+  addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
 
   addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
   addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -70,6 +71,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::i64, Custom);
+
   setTargetDAGCombine(ISD::SELECT_CC);
 
   setTargetDAGCombine(ISD::SETCC);
@@ -234,6 +239,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::STORE: return LowerSTORE(Op, DAG);
   }
   return SDValue();
 }
@@ -332,6 +338,32 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   return Chain;
 }
 
+#define RSRC_DATA_FORMAT 0xf00000000000
+
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue VirtualAddress = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+
+  if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) {
+    return SDValue();
+  }
+
+  SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
+                               DAG.getConstant(0, MVT::i64),
+			       DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64));
+
+  SDValue Ops[2];
+  Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain,
+                       Value, SrcSrc, VirtualAddress);
+  Ops[1] = Chain;
+
+  return DAG.getMergeValues(Ops, 2, DL);
+
+}
+
 SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -424,9 +456,12 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
     float F;
   } Imm;
 
-  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
+    if (Node->getZExtValue() >> 32) {
+        return -1;
+    }
     Imm.I = Node->getSExtValue();
-  else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+  } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
     Imm.F = Node->getValueAPF().convertToFloat();
   else
     return -1; // It isn't an immediate
@@ -534,8 +569,9 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
   Operand = SDValue(Node, 0);
 }
 
-SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
-                                          SelectionDAG &DAG) const {
+/// \brief Try to fold the Nodes operands into the Node
+SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
+                                       SelectionDAG &DAG) const {
 
   // Original encoding (either e32 or e64)
   int Opcode = Node->getMachineOpcode();
@@ -666,5 +702,116 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
 
   // Create a complete new instruction
   return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
-                            Node->getVTList(), Ops.data(), Ops.size());
+                            Node->getVTList(), Ops);
+}
+
+/// \brief Helper function for adjustWritemask
+unsigned SubIdx2Lane(unsigned Idx) {
+  switch (Idx) {
+  default: return 0;
+  case AMDGPU::sub0: return 0;
+  case AMDGPU::sub1: return 1;
+  case AMDGPU::sub2: return 2;
+  case AMDGPU::sub3: return 3;
+  }
+}
+
+/// \brief Adjust the writemask of MIMG instructions
+void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
+                                       SelectionDAG &DAG) const {
+  SDNode *Users[4] = { };
+  unsigned Writemask = 0, Lane = 0;
+
+  // Try to figure out the used register components
+  for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
+       I != E; ++I) {
+
+    // Abort if we can't understand the usage
+    if (!I->isMachineOpcode() ||
+        I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+      return;
+
+    Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+
+    // Abort if we have more than one user per component
+    if (Users[Lane])
+      return;
+
+    Users[Lane] = *I;
+    Writemask |= 1 << Lane;
+  }
+
+  // Abort if all components are used
+  if (Writemask == 0xf)
+    return;
+
+  // Adjust the writemask in the node
+  std::vector<SDValue> Ops;
+  Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(Node->getOperand(i));
+  Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+
+  // If we only got one lane, replace it with a copy
+  if (Writemask == (1U << Lane)) {
+    SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+    SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                      DebugLoc(), Users[Lane]->getValueType(0),
+                                      SDValue(Node, 0), RC);
+    DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+    return;
+  }
+
+  // Update the users of the node with the new indices
+  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+
+    SDNode *User = Users[i];
+    if (!User)
+      continue;
+
+    SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
+    DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
+
+    switch (Idx) {
+    default: break;
+    case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
+    case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
+    case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+    }
+  }
+}
+
+/// \brief Fold the instructions after slecting them
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+                                          SelectionDAG &DAG) const {
+
+  if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
+    adjustWritemask(Node, DAG);
+
+  return foldOperands(Node, DAG);
+}
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                                     SDNode *Node) const {
+  if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+    return;
+
+  unsigned VReg = MI->getOperand(0).getReg();
+  unsigned Writemask = MI->getOperand(1).getImm();
+  unsigned BitsSet = 0;
+  for (unsigned i = 0; i < 4; ++i)
+    BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+  const TargetRegisterClass *RC;
+  switch (BitsSet) {
+  default: return;
+  case 1:  RC = &AMDGPU::VReg_32RegClass; break;
+  case 2:  RC = &AMDGPU::VReg_64RegClass; break;
+  case 3:  RC = &AMDGPU::VReg_96RegClass; break;
+  }
+
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MRI.setRegClass(VReg, RC);
 }
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.h b/contrib/llvm/lib/Target/R600/SIISelLowering.h
index 5ad2f40..de637be 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.h
@@ -24,6 +24,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   const SIInstrInfo * TII;
   const TargetRegisterInfo * TRI;
 
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
@@ -33,6 +34,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
   void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, 
                        unsigned RegClass, bool &ScalarSlotUsed) const;
 
+  SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const;
+  void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
+
 public:
   SITargetLowering(TargetMachine &tm);
 
@@ -49,6 +53,8 @@ public:
   virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
   virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+  virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                             SDNode *Node) const;
 
   int32_t analyzeImmediate(const SDNode *N) const;
 };
diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
index 3891ddb..f737ddd 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
@@ -284,33 +284,33 @@ let Uses = [EXEC] in {
 class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
     Enc64<outs, ins, asm, pattern> {
 
-  bits<8> VDATA;
-  bits<12> OFFSET;
-  bits<1> OFFEN;
-  bits<1> IDXEN;
-  bits<1> GLC;
-  bits<1> ADDR64;
-  bits<1> LDS;
-  bits<8> VADDR;
-  bits<7> SRSRC;
-  bits<1> SLC;
-  bits<1> TFE;
-  bits<8> SOFFSET;
-
-  let Inst{11-0} = OFFSET;
-  let Inst{12} = OFFEN;
-  let Inst{13} = IDXEN;
-  let Inst{14} = GLC;
-  let Inst{15} = ADDR64;
-  let Inst{16} = LDS;
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> addr64;
+  bits<1> lds;
+  bits<8> vaddr;
+  bits<8> vdata;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+
+  let Inst{11-0} = offset;
+  let Inst{12} = offen;
+  let Inst{13} = idxen;
+  let Inst{14} = glc;
+  let Inst{15} = addr64;
+  let Inst{16} = lds;
   let Inst{24-18} = op;
   let Inst{31-26} = 0x38; //encoding
-  let Inst{39-32} = VADDR;
-  let Inst{47-40} = VDATA;
-  let Inst{52-48} = SRSRC{6-2};
-  let Inst{54} = SLC;
-  let Inst{55} = TFE;
-  let Inst{63-56} = SOFFSET;
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54} = slc;
+  let Inst{55} = tfe;
+  let Inst{63-56} = soffset;
 
   let VM_CNT = 1;
   let EXP_CNT = 1;
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
index 0bfcef5..9a04c60 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
   };
 
+  const int16_t Sub0_2[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+  };
+
   const int16_t Sub0_1[] = {
     AMDGPU::sub0, AMDGPU::sub1, 0
   };
@@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opcode = AMDGPU::V_MOV_B32_e32;
     SubIndices = Sub0_1;
 
+  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_2;
+
   } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
     assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
 	   AMDGPU::SReg_128RegClass.contains(SrcReg));
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
index d4e60e5..87eff4d 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
@@ -80,6 +80,7 @@ namespace AMDGPU {
   int getVOPe64(uint16_t Opcode);
   int getCommuteRev(uint16_t Opcode);
   int getCommuteOrig(uint16_t Opcode);
+  int isMIMG(uint16_t Opcode);
 
 } // End namespace AMDGPU
 
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
index 617f0b8..c8aecb7 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@@ -26,6 +26,10 @@ def HI32 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
 }]>;
 
+def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE",
+                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                           [SDNPHasChain, SDNPMayStore]>;
+
 def IMM8bitDWORD : ImmLeaf <
   i32, [{
     return (Imm & ~0x3FC) == 0;
@@ -255,14 +259,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
 class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_32:$dst),
   (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
-   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
 >, VOP <opName>;
 
 class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_64:$dst),
   (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
-   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
 >, VOP <opName>;
 
@@ -285,17 +289,39 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
 
 class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
   op,
-  (outs regClass:$dst),
+  (outs regClass:$vdata),
   (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
        i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
        i1imm:$tfe, SSrc_32:$soffset),
-  asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+  asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, "
      #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
   []> {
   let mayLoad = 1;
   let mayStore = 0;
 }
 
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+                         ValueType VT> :
+    MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr),
+          name#" $vdata, $srsrc + $vaddr",
+          [(SIbuffer_store (VT vdataClass:$vdata), (i128 SReg_128:$srsrc),
+                                                    (i64 VReg_64:$vaddr))]> {
+
+  let mayLoad = 0;
+  let mayStore = 1;
+
+  // Encoding
+  let offset = 0;
+  let offen = 0;
+  let idxen = 0;
+  let glc = 0;
+  let addr64 = 1;
+  let lds = 0;
+  let slc = 0;
+  let tfe = 0;
+  let soffset = 128; // ZERO
+}
+
 class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
   op,
   (outs regClass:$dst),
@@ -309,7 +335,22 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
   let mayStore = 0;
 }
 
-class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG <
+  op,
+  (outs VReg_128:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+       SReg_256:$srsrc),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc",
+  []> {
+  let SSAMP = 0;
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasPostISelHook = 1;
+}
+
+class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG <
   op,
   (outs VReg_128:$vdata),
   (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
@@ -320,6 +361,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
   []> {
   let mayLoad = 1;
   let mayStore = 0;
+  let hasPostISelHook = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -353,4 +395,13 @@ def getCommuteOrig : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+// Test if the supplied opcode is an MIMG instruction
+def isMIMG : InstrMapping {
+  let FilterClass = "MIMG";
+  let RowFields = ["Inst"];
+  let ColFields = ["Size"];
+  let KeyCol = ["8"];
+  let ValueCols = [["8"]];
+}
+
 include "SIInstructions.td"
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
index 4f734f9..0d50c5d 100644
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@@ -108,7 +108,7 @@ VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
 def S_CMPK_EQ_I32 : SOPK <
   0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
   "S_CMPK_EQ_I32",
-  [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+  [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
 >;
 */
 
@@ -408,8 +408,14 @@ def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2",
 def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
 //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
 //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
-//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
-//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+
+def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
+  0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32
+>;
+
+def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
+  0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64
+>;
 //def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
 //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
 //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -489,7 +495,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
 //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
 //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
 //def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
-//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
 //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
 //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
 //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
@@ -498,7 +504,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
 //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
 //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
 //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
 //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
 //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
 //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
@@ -516,20 +522,20 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
 //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
 //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
 //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; 
+def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">; 
 //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">;
 //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
-def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">;
+def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">;
 //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
 //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">;
 //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
 //def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
 //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
-def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
+def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
 //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
 //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
 //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
@@ -594,12 +600,14 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
 //defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
 //defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
 defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
-  [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
+  [(set f32:$dst, (sint_to_fp i32:$src0))]
+>;
+defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
+  [(set f32:$dst, (uint_to_fp i32:$src0))]
 >;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
 defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
-  [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
+  [(set i32:$dst, (fp_to_sint f32:$src0))]
 >;
 defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
 ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
@@ -616,35 +624,37 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
 //defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
 //defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
 defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
-  [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
+  [(set f32:$dst, (AMDGPUfract f32:$src0))]
+>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
+  [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))]
 >;
-defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
 defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
-  [(set VReg_32:$dst, (fceil VSrc_32:$src0))]
+  [(set f32:$dst, (fceil f32:$src0))]
 >;
 defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
-  [(set VReg_32:$dst, (frint VSrc_32:$src0))]
+  [(set f32:$dst, (frint f32:$src0))]
 >;
 defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
-  [(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
+  [(set f32:$dst, (ffloor f32:$src0))]
 >;
 defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
-  [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
+  [(set f32:$dst, (fexp2 f32:$src0))]
 >;
 defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
 defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
-  [(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
+  [(set f32:$dst, (flog2 f32:$src0))]
 >;
 defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
 defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
 defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
-  [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
+  [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
 >;
 defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
 defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
 defm V_RSQ_LEGACY_F32 : VOP1_32 <
   0x0000002d, "V_RSQ_LEGACY_F32",
-  [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
+  [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
 >;
 defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
 defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
@@ -787,14 +797,13 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
   (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
    InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
-  [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
-   VSrc_32:$src1, VSrc_32:$src0))]
+  [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
 >;
 
 //f32 pattern for V_CNDMASK_B32_e64
 def : Pat <
-  (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
-  (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
+  (f32 (select i1:$src2, f32:$src1, f32:$src0)),
+  (V_CNDMASK_B32_e64 $src0, $src1, $src2)
 >;
 
 defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
@@ -802,11 +811,11 @@ defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
 
 let isCommutable = 1 in {
 defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
-  [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (fadd f32:$src0, f32:$src1))]
 >;
 
 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
-  [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (fsub f32:$src0, f32:$src1))]
 >;
 defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
 } // End isCommutable = 1
@@ -817,11 +826,11 @@ let isCommutable = 1 in {
 
 defm V_MUL_LEGACY_F32 : VOP2_32 <
   0x00000007, "V_MUL_LEGACY_F32",
-  [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
 >;
 
 defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
-  [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (fmul f32:$src0, f32:$src1))]
 >;
 
 } // End isCommutable = 1
@@ -834,43 +843,51 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
 let isCommutable = 1 in {
 
 defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
-  [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
 >;
 
 defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
-  [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
 >;
 
 defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
 defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
+  [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
+>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
+  [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
+>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
+  [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
+>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
+  [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
+>;
 
 defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
-  [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (srl i32:$src0, i32:$src1))]
 >;
 defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
 
 defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
-  [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (sra i32:$src0, i32:$src1))]
 >;
 defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
 
 defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
-  [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (shl i32:$src0, i32:$src1))]
 >;
 defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
 
 defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
-  [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
+  [(set i32:$dst, (and i32:$src0, i32:$src1))]
 >;
 defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
-  [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
+  [(set i32:$dst, (or i32:$src0, i32:$src1))]
 >;
 defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
-  [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
+  [(set i32:$dst, (xor i32:$src0, i32:$src1))]
 >;
 
 } // End isCommutable = 1
@@ -885,11 +902,11 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
 
 let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
 defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
-  [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
 >;
 
 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
-  [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (sub i32:$src0, i32:$src1))]
 >;
 defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
 
@@ -905,7 +922,7 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
 ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
 ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
 defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
- [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
+ [(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
 >;
 ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
 ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
@@ -942,6 +959,7 @@ def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
 def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
 def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
 def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+defm : BFIPatterns <V_BFI_B32>;
 def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
 def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
 //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
@@ -983,18 +1001,18 @@ def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
 } // isCommutable = 1
 
 def : Pat <
-  (mul VSrc_32:$src0, VReg_32:$src1),
-  (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+  (mul i32:$src0, i32:$src1),
+  (V_MUL_LO_I32 $src0, $src1, (i32 0))
 >;
 
 def : Pat <
-  (mulhu VSrc_32:$src0, VReg_32:$src1),
-  (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+  (mulhu i32:$src0, i32:$src1),
+  (V_MUL_HI_U32 $src0, $src1, (i32 0))
 >;
 
 def : Pat <
-  (mulhs VSrc_32:$src0, VReg_32:$src1),
-  (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+  (mulhs i32:$src0, i32:$src1),
+  (V_MUL_HI_I32 $src0, $src1, (i32 0))
 >;
 
 def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
@@ -1019,34 +1037,27 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
 def S_CSELECT_B32 : SOP2 <
   0x0000000a, (outs SReg_32:$dst),
   (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
-  [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
-                                     SReg_32:$src0, SReg_32:$src1))]
+  []
 >;
 
 def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
 
-// f32 pattern for S_CSELECT_B32
-def : Pat <
-  (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
-  (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
->;
-
 def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
 
 def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
-  [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
+  [(set i64:$dst, (and i64:$src0, i64:$src1))]
 >;
 
 def : Pat <
-  (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
-  (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
+  (i1 (and i1:$src0, i1:$src1)),
+  (S_AND_B64 $src0, $src1)
 >;
 
 def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
 def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
 def : Pat <
-  (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
-  (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
+  (i1 (or i1:$src0, i1:$src1)),
+  (S_OR_B64 $src0, $src1)
 >;
 def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
 def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
@@ -1097,14 +1108,14 @@ def SI_IF : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$vcc, brtarget:$target),
   "SI_IF $dst, $vcc, $target",
-  [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
+  [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
 >;
 
 def SI_ELSE : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$src, brtarget:$target),
   "SI_ELSE $dst, $src, $target",
-  [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
+  [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
 
   let Constraints = "$src = $dst";
 }
@@ -1113,7 +1124,7 @@ def SI_LOOP : InstSI <
   (outs),
   (ins SReg_64:$saved, brtarget:$target),
   "SI_LOOP $saved, $target",
-  [(int_SI_loop SReg_64:$saved, bb:$target)]
+  [(int_SI_loop i64:$saved, bb:$target)]
 >;
 
 } // end isBranch = 1, isTerminator = 1
@@ -1122,35 +1133,35 @@ def SI_BREAK : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$src),
   "SI_ELSE $dst, $src",
-  [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
+  [(set i64:$dst, (int_SI_break i64:$src))]
 >;
 
 def SI_IF_BREAK : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$vcc, SReg_64:$src),
   "SI_IF_BREAK $dst, $vcc, $src",
-  [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
+  [(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
 >;
 
 def SI_ELSE_BREAK : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$src0, SReg_64:$src1),
   "SI_ELSE_BREAK $dst, $src0, $src1",
-  [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
+  [(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
 >;
 
 def SI_END_CF : InstSI <
   (outs),
   (ins SReg_64:$saved),
   "SI_END_CF $saved",
-  [(int_SI_end_cf SReg_64:$saved)]
+  [(int_SI_end_cf i64:$saved)]
 >;
 
 def SI_KILL : InstSI <
   (outs),
   (ins VReg_32:$src),
   "SI_KIL $src",
-  [(int_AMDGPU_kill VReg_32:$src)]
+  [(int_AMDGPU_kill f32:$src)]
 >;
 
 } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
@@ -1184,8 +1195,8 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 } // end IsCodeGenOnly, isPseudo
 
 def : Pat<
-  (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
-  (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
+  (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
+  (V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
 >;
 
 def : Pat <
@@ -1195,93 +1206,110 @@ def : Pat <
 
 /* int_SI_vs_load_input */
 def : Pat<
-  (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
-                        VReg_32:$buf_idx_vgpr),
+  (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset,
+                        i32:$buf_idx_vgpr),
   (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
-                           VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
-                           0, 0, 0)
+                           $buf_idx_vgpr, $tlst, 0, 0, 0)
 >;
 
 /* int_SI_export */
 def : Pat <
   (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
-                 VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+                 f32:$src0, f32:$src1, f32:$src2, f32:$src3),
   (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
-       VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+       $src0, $src1, $src2, $src3)
 >;
 
+/********** ======================= **********/
+/********** Image sampling patterns **********/
+/********** ======================= **********/
 
 /* int_SI_sample for simple 1D texture lookup */
 def : Pat <
-  (int_SI_sample imm:$writemask, VReg_32:$addr,
-                 SReg_256:$rsrc, SReg_128:$sampler, imm),
-  (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
-                SReg_256:$rsrc, SReg_128:$sampler)
+  (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+  (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
-class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
-                    ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, imm),
-    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
-class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
-                        ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
-    (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
+    (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
-class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
-                         ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
-    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
 class SampleShadowPattern<Intrinsic name, MIMG opcode,
-                          RegisterClass addr_class, ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
-    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+                          ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
 class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
-                               RegisterClass addr_class, ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
-    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+                               ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
 /* int_SI_sample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
-  def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
-  def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
-  def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
-  def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
-  def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
-
-  def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
-  def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
-  def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
-  def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
-
-  def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
-  def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
-  def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
-  def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+multiclass SamplePatterns<ValueType addr_type> {
+  def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+  def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+  def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+  def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
+
+  def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
+  def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
+  def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
+
+  def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
+  def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
+  def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
+}
+
+defm : SamplePatterns<v2i32>;
+defm : SamplePatterns<v4i32>;
+defm : SamplePatterns<v8i32>;
+defm : SamplePatterns<v16i32>;
+
+/* int_SI_imageload for texture fetches consuming varying address parameters */
+class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, imm),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+multiclass ImageLoadPatterns<ValueType addr_type> {
+  def : ImageLoadPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
+  def : ImageLoadArrayPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
 }
 
-defm : SamplePatterns<VReg_64, v2i32>;
-defm : SamplePatterns<VReg_128, v4i32>;
-defm : SamplePatterns<VReg_256, v8i32>;
-defm : SamplePatterns<VReg_512, v16i32>;
+defm : ImageLoadPatterns<v2i32>;
+defm : ImageLoadPatterns<v4i32>;
+
+/* Image resource information */
+def : Pat <
+  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
+  (IMAGE_GET_RESINFO 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+def : Pat <
+  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
+  (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
 
 /********** ============================================ **********/
 /********** Extraction, Insertion, Building and Casting  **********/
@@ -1289,77 +1317,77 @@ defm : SamplePatterns<VReg_512, v16i32>;
 
 foreach Index = 0-2 in {
   def Extract_Element_v2i32_#Index : Extract_Element <
-    i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v2i32_#Index : Insert_Element <
-    i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v2f32_#Index : Extract_Element <
-    f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v2f32_#Index : Insert_Element <
-    f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
 foreach Index = 0-3 in {
   def Extract_Element_v4i32_#Index : Extract_Element <
-    i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v4i32_#Index : Insert_Element <
-    i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v4f32_#Index : Extract_Element <
-    f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v4f32_#Index : Insert_Element <
-    f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
 foreach Index = 0-7 in {
   def Extract_Element_v8i32_#Index : Extract_Element <
-    i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v8i32_#Index : Insert_Element <
-    i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v8f32_#Index : Extract_Element <
-    f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v8f32_#Index : Insert_Element <
-    f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
 foreach Index = 0-15 in {
   def Extract_Element_v16i32_#Index : Extract_Element <
-    i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v16i32_#Index : Insert_Element <
-    i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v16f32_#Index : Extract_Element <
-    f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v16f32_#Index : Insert_Element <
-    f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
-def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
-def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
-def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
-def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
-def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
-def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
-def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
-def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
+def : Vector1_Build <v1i32, i32, VReg_32>;
+def : Vector2_Build <v2i32, i32>;
+def : Vector2_Build <v2f32, f32>;
+def : Vector4_Build <v4i32, i32>;
+def : Vector4_Build <v4f32, f32>;
+def : Vector8_Build <v8i32, i32>;
+def : Vector8_Build <v8f32, f32>;
+def : Vector16_Build <v16i32, i32>;
+def : Vector16_Build <v16f32, f32>;
 
 def : BitConvert <i32, f32, SReg_32>;
 def : BitConvert <i32, f32, VReg_32>;
@@ -1372,20 +1400,20 @@ def : BitConvert <f32, i32, VReg_32>;
 /********** =================== **********/
 
 def : Pat <
-  (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
-  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+  (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
    0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
 >;
 
 def : Pat <
-  (fabs VReg_32:$src),
-  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+  (fabs f32:$src),
+  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
    1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
 >;
 
 def : Pat <
-  (fneg VReg_32:$src),
-  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+  (fneg f32:$src),
+  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
    0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
 >;
 
@@ -1426,16 +1454,16 @@ def : Pat <
 /********** ===================== **********/
 
 def : Pat <
-  (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
-  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
+  (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
+  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
 >;
 
 def : Pat <
-  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
-  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
-                                    imm:$attr_chan, imm:$attr, M0Reg:$params),
-                   (EXTRACT_SUBREG VReg_64:$ij, sub1),
-                   imm:$attr_chan, imm:$attr, M0Reg:$params)
+  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
+  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
+                                    imm:$attr_chan, imm:$attr, i32:$params),
+                   (EXTRACT_SUBREG $ij, sub1),
+                   imm:$attr_chan, imm:$attr, $params)
 >;
 
 /********** ================== **********/
@@ -1443,101 +1471,111 @@ def : Pat <
 /********** ================== **********/
 
 /* llvm.AMDGPU.pow */
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
 
 def : Pat <
-  (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
-  (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
+  (int_AMDGPU_div f32:$src0, f32:$src1),
+  (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
 >;
 
 def : Pat<
-  (fdiv VSrc_32:$src0, VSrc_32:$src1),
-  (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
+  (fdiv f32:$src0, f32:$src1),
+  (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
 >;
 
 def : Pat <
-  (fcos VSrc_32:$src0),
-  (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+  (fcos f32:$src0),
+  (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
 >;
 
 def : Pat <
-  (fsin VSrc_32:$src0),
-  (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+  (fsin f32:$src0),
+  (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
 >;
 
 def : Pat <
-  (int_AMDGPU_cube VReg_128:$src),
+  (int_AMDGPU_cube v4f32:$src),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
-    (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub0),
-    (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub1),
-    (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub2),
-    (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub3)
+    (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub0),
+    (V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub1),
+    (V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub2),
+    (V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub3)
 >;
 
 def : Pat <
-  (i32 (sext (i1 SReg_64:$src0))),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
+  (i32 (sext i1:$src0)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
 >;
 
 // 1. Offset as 8bit DWORD immediate
 def : Pat <
-  (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
-  (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+  (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
 >;
 
 // 2. Offset loaded in an 32bit SGPR
 def : Pat <
-  (int_SI_load_const SReg_128:$sbase, imm:$offset),
-  (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+  (int_SI_load_const v16i8:$sbase, imm:$offset),
+  (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
 >;
 
 // 3. Offset in an 32Bit VGPR
 def : Pat <
-  (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
-  (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+  (int_SI_load_const v16i8:$sbase, i32:$voff),
+  (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, $voff, $sbase, 0, 0, 0)
+>;
+
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+  (AMDGPUurecip i32:$src0),
+  (V_CVT_U32_F32_e32
+    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
 >;
 
 /********** ================== **********/
 /**********   VOP3 Patterns    **********/
 /********** ================== **********/
 
-def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
-           (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
-            0, 0, 0, 0)>;
+def : Pat <
+  (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
+  (V_MAD_F32 $src0, $src1, $src2)
+>;
 
 /********** ================== **********/
 /**********   SMRD Patterns    **********/
 /********** ================== **********/
 
 multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+
   // 1. Offset as 8bit DWORD immediate
   def : Pat <
-    (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
-    (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
+    (constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
+    (vt (Instr_IMM $sbase, IMM8bitDWORD:$offset))
   >;
 
   // 2. Offset loaded in an 32bit SGPR
   def : Pat <
-    (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
-    (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
+    (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
+    (vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset)))
   >;
 
   // 3. No offset at all
   def : Pat <
-    (constant_load SReg_64:$sbase),
-    (vt (Instr_IMM SReg_64:$sbase, 0))
+    (constant_load i64:$sbase),
+    (vt (Instr_IMM $sbase, 0))
   >;
 }
 
@@ -1550,45 +1588,37 @@ defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
 /**********   Indirect adressing   **********/
 /********** ====================== **********/
 
-multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
-                                SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
+
   // 1. Extract with offset
   def : Pat<
-    (vector_extract (vt rc:$vec),
-      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
-    ),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+    (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))),
+    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
   >;
 
   // 2. Extract without offset
   def : Pat<
-    (vector_extract (vt rc:$vec),
-      (i64 (zext (i32 VReg_32:$idx)))
-    ),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+    (vector_extract vt:$vec, (i64 (zext i32:$idx))),
+    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
   >;
 
   // 3. Insert with offset
   def : Pat<
-    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
-      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
-    ),
-    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+    (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))),
+    (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
   >;
 
   // 4. Insert without offset
   def : Pat<
-    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
-      (i64 (zext (i32 VReg_32:$idx)))
-    ),
-    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+    (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))),
+    (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
   >;
 }
 
-defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
 
 /********** =============== **********/
 /**********   Conditions    **********/
@@ -1596,12 +1626,18 @@ defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
 
 def : Pat<
   (i1 (setcc f32:$src0, f32:$src1, SETO)),
-  (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+  (V_CMP_O_F32_e64 $src0, $src1)
 >;
 
 def : Pat<
   (i1 (setcc f32:$src0, f32:$src1, SETUO)),
-  (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+  (V_CMP_U_F32_e64 $src0, $src1)
 >;
 
+//============================================================================//
+// Miscellaneous Optimization Patterns
+//============================================================================//
+
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
+
 } // End isSI predicate
diff --git a/contrib/llvm/lib/Target/R600/SIIntrinsics.td b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
index 0af378e..224cd2f 100644
--- a/contrib/llvm/lib/Target/R600/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
@@ -19,12 +19,16 @@ let TargetPrefix = "SI", isTarget = 1 in {
   def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
 
-  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_SI_sample : Sample;
   def int_SI_sampleb : Sample;
   def int_SI_samplel : Sample;
 
+  def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
   /* Interpolation Intrinsics */
 
   def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
index 4f14931..244d4c00 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
@@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1],
                              [(add (trunc VGPR_32, 255)),
                               (add (shl VGPR_32, 1))]>;
 
+// VGPR 96-bit registers
+def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+                             [(add (trunc VGPR_32, 254)),
+                              (add (shl VGPR_32, 1)),
+                              (add (shl VGPR_32, 2))]>;
+
 // VGPR 128-bit registers
 def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
                               [(add (trunc VGPR_32, 253)),
@@ -151,7 +157,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
   (add SGPR_64, VCCReg, EXECReg)
 >;
 
-def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
 
 def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
 
@@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
 
 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
 
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+  let Size = 96;
+}
+
 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
 
 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
new file mode 100644
index 0000000..aac0e8d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
@@ -0,0 +1,62 @@
+//===-- SparcBaseInfo.h - Top level definitions for Sparc ---- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions
+// for the Sparc target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core code gen
+// types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCBASEINFO_H
+#define SPARCBASEINFO_H
+
+namespace llvm {
+
+/// SPII - This namespace holds target specific flags for instruction info.
+namespace SPII {
+
+/// Target Operand Flags. Sparc specific TargetFlags for MachineOperands and
+/// SDNodes.
+enum TOF {
+  MO_NO_FLAG,
+
+  // Extract the low 10 bits of an address.
+  // Assembler: %lo(addr)
+  MO_LO,
+
+  // Extract bits 31-10 of an address. Only for sethi.
+  // Assembler: %hi(addr) or %lm(addr)
+  MO_HI,
+
+  // Extract bits 43-22 of an adress. Only for sethi.
+  // Assembler: %h44(addr)
+  MO_H44,
+
+  // Extract bits 21-12 of an address.
+  // Assembler: %m44(addr)
+  MO_M44,
+
+  // Extract bits 11-0 of an address.
+  // Assembler: %l44(addr)
+  MO_L44,
+
+  // Extract bits 63-42 of an address. Only for sethi.
+  // Assembler: %hh(addr)
+  MO_HH,
+
+  // Extract bits 41-32 of an address.
+  // Assembler: %hm(addr)
+  MO_HM
+};
+
+} // end namespace SPII
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 7fdb0c3..1c64e1b 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -50,14 +50,42 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
+// Code models. Some only make sense for 64-bit code.
+//
+// SunCC  Reloc   CodeModel  Constraints
+// abs32  Static  Small      text+data+bss linked below 2^32 bytes
+// abs44  Static  Medium     text+data+bss linked below 2^44 bytes
+// abs64  Static  Large      text smaller than 2^31 bytes
+// pic13  PIC_    Small      GOT < 2^13 bytes
+// pic32  PIC_    Medium     GOT < 2^32 bytes
+//
+// All code models require that the text segment is smaller than 2GB.
+
 static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 32-bit code model is abs32/pic32.
+  if (CM == CodeModel::Default)
+    CM = RM == Reloc::PIC_ ? CodeModel::Medium : CodeModel::Small;
+
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
+static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 64-bit code model is abs44/pic32.
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Medium;
+
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
 extern "C" void LLVMInitializeSparcTargetMC() {
   // Register the MC asm info.
   RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
@@ -67,7 +95,7 @@ extern "C" void LLVMInitializeSparcTargetMC() {
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
                                        createSparcMCCodeGenInfo);
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
-                                       createSparcMCCodeGenInfo);
+                                       createSparcV9MCCodeGenInfo);
 
   // Register the MC instruction info.
   TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index e14b3cb..108eb90 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -72,15 +73,39 @@ namespace {
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand (opNum);
-  bool CloseParen = false;
-  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
-    O << "%hi(";
-    CloseParen = true;
-  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
-             !MO.isReg() && !MO.isImm()) {
-    O << "%lo(";
-    CloseParen = true;
+  unsigned TF = MO.getTargetFlags();
+#ifndef NDEBUG
+  // Verify the target flags.
+  if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
+    if (MI->getOpcode() == SP::CALL)
+      assert(TF == SPII::MO_NO_FLAG &&
+             "Cannot handle target flags on call address");
+    else if (MI->getOpcode() == SP::SETHIi)
+      assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) &&
+             "Invalid target flags for address operand on sethi");
+    else
+      assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 ||
+              TF == SPII::MO_HM) &&
+             "Invalid target flags for small address operand");
   }
+#endif
+
+  bool CloseParen = true;
+  switch (TF) {
+  default:
+      llvm_unreachable("Unknown target flags on operand");
+  case SPII::MO_NO_FLAG:
+    CloseParen = false;
+    break;
+  case SPII::MO_LO:  O << "%lo(";  break;
+  case SPII::MO_HI:  O << "%hi(";  break;
+  case SPII::MO_H44: O << "%h44("; break;
+  case SPII::MO_M44: O << "%m44("; break;
+  case SPII::MO_L44: O << "%l44("; break;
+  case SPII::MO_HH:  O << "%hh(";  break;
+  case SPII::MO_HM:  O << "%hm(";  break;
+  }
+
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
     O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
@@ -127,14 +152,7 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
     return;   // don't print "+0"
 
   O << "+";
-  if (MI->getOperand(opNum+1).isGlobal() ||
-      MI->getOperand(opNum+1).isCPI()) {
-    O << "%lo(";
-    printOperand(MI, opNum+1, O);
-    O << ")";
-  } else {
-    printOperand(MI, opNum+1, O);
-  }
+  printOperand(MI, opNum+1, O);
 }
 
 bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
index b38ac61..54784e0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -12,25 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
+// SPARC v8 32-bit.
 //===----------------------------------------------------------------------===//
 
-// Sparc 32-bit C return-value convention.
-def RetCC_Sparc32 : CallingConv<[
-  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 64-bit C return-value convention.
-def RetCC_Sparc64 : CallingConv<[
-  CCIfType<[i32], CCPromoteToType<i64>>,
-  CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 32-bit C Calling convention.
 def CC_Sparc32 : CallingConv<[
   //Custom assign SRet to [sp+64].
   CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
@@ -43,14 +27,93 @@ def CC_Sparc32 : CallingConv<[
   CCAssignToStack<4, 4>
 ]>;
 
-// Sparc 64-bit C Calling convention.
+def RetCC_Sparc32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// SPARC v9 64-bit.
+//===----------------------------------------------------------------------===//
+//
+// The 64-bit ABI conceptually assigns all function arguments to a parameter
+// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments
+// occupy a multiple of 8 bytes in the array. Integer arguments are extended to
+// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the
+// first 4 bytes in the slot are undefined.
+//
+// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter
+// array at fixed offsets. Integer arguments are promoted to registers when
+// possible.
+//
+// The floating point registers %f0 to %f31 shadow the first 128 bytes of the
+// parameter array at fixed offsets. Float and double parameters are promoted
+// to these registers when possible.
+//
+// Structs up to 16 bytes in size are passed by value. They are right-aligned
+// in one or two 8-byte slots in the parameter array. Struct members are
+// promoted to both floating point and integer registers when possible. A
+// struct containing two floats would thus be passed in %f0 and %f1, while two
+// float function arguments would occupy 8 bytes each, and be passed in %f1 and
+// %f3.
+//
+// When a struct { int, float } is passed by value, the int goes in the high
+// bits of an integer register while the float goes in a floating point
+// register.
+//
+// The difference is encoded in LLVM IR using the inreg atttribute on function
+// arguments:
+//
+//   C:   void f(float, float);
+//   IR:  declare void f(float %f1, float %f3)
+//
+//   C:   void f(struct { float f0, f1; });
+//   IR:  declare void f(float inreg %f0, float inreg %f1)
+//
+//   C:   void f(int, float);
+//   IR:  declare void f(int signext %i0, float %f3)
+//
+//   C:   void f(struct { int i0high; float f1; });
+//   IR:  declare void f(i32 inreg %i0high, float inreg %f1)
+//
+// Two ints in a struct are simply coerced to i64:
+//
+//   C:   void f(struct { int i0high, i0low; });
+//   IR:  declare void f(i64 %i0.coerced)
+//
+// The frontend and backend divide the task of producing ABI compliant code for
+// C functions. The C frontend will:
+//
+//  - Annotate integer arguments with zeroext or signext attributes.
+//
+//  - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with
+//    inreg attributes.
+//
+//  - Pass structs larger than 16 bytes indirectly with an explicit pointer
+//    argument. The byval attribute is not used.
+//
+// The backend will:
+//
+//  - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg.
+//
+//  - Promote to integer or floating point registers depending on type.
+//
+// Function return values are passed exactly like function arguments, except a
+// struct up to 32 bytes in size can be returned in registers.
+
+// Function arguments AND return values.
 def CC_Sparc64 : CallingConv<[
+  // The frontend uses the inreg flag to indicate i32 and float arguments from
+  // structs. These arguments are not promoted to 64 bits, but they can still
+  // be assigned to integer and float registers.
+  CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>,
+
   // All integers are promoted to i64 by the caller.
   CCIfType<[i32], CCPromoteToType<i64>>,
-  // Integer arguments get passed in integer registers if there is space.
-  CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  // FIXME: Floating point arguments.
 
-  // Alternatively, they are assigned to the stack in 8-byte aligned units.
-  CCAssignToStack<8, 8>
+  // Custom assignment is required because stack space is reserved for all
+  // arguments whether they are passed in registers or not.
+  CCCustom<"CC_Sparc64_Full">
 ]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index a0dae6e..7874240 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -37,18 +37,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Get the number of bytes to allocate from the FrameInfo
   int NumBytes = (int) MFI->getStackSize();
 
-  // Emit the correct save instruction based on the number of bytes in
-  // the frame. Minimum stack frame size according to V8 ABI is:
-  //   16 words for register window spill
-  //    1 word for address of returned aggregate-value
-  // +  6 words for passing parameters on the stack
-  // ----------
-  //   23 words * 4 bytes per word = 92 bytes
-  NumBytes += 92;
+  if (SubTarget.is64Bit()) {
+    // All 64-bit stack frames must be 16-byte aligned, and must reserve space
+    // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128.
+    NumBytes += 128;
+    // Frames with calls must also reserve space for 6 outgoing arguments
+    // whether they are used or not. LowerCall_64 takes care of that.
+    assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned");
+  } else {
+    // Emit the correct save instruction based on the number of bytes in
+    // the frame. Minimum stack frame size according to V8 ABI is:
+    //   16 words for register window spill
+    //    1 word for address of returned aggregate-value
+    // +  6 words for passing parameters on the stack
+    // ----------
+    //   23 words * 4 bytes per word = 92 bytes
+    NumBytes += 92;
 
-  // Round up to next doubleword boundary -- a double-word boundary
-  // is required by the ABI.
-  NumBytes = (NumBytes + 7) & ~7;
+    // Round up to next doubleword boundary -- a double-word boundary
+    // is required by the ABI.
+    NumBytes = RoundUpToAlignment(NumBytes, 8);
+  }
   NumBytes = -NumBytes;
 
   if (NumBytes >= -4096) {
@@ -70,15 +79,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
 void SparcFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  MachineInstr &MI = *I;
-  DebugLoc dl = MI.getDebugLoc();
-  int Size = MI.getOperand(0).getImm();
-  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
-    Size = -Size;
-  const SparcInstrInfo &TII =
-    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
-  if (Size)
-    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+  if (!hasReservedCallFrame(MF)) {
+    MachineInstr &MI = *I;
+    DebugLoc DL = MI.getDebugLoc();
+    int Size = MI.getOperand(0).getImm();
+    if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+      Size = -Size;
+    const SparcInstrInfo &TII =
+      *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+    if (Size)
+      BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
+        .addImm(Size);
+  }
   MBB.erase(I);
 }
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
index 464233e..c375662 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -22,10 +22,12 @@ namespace llvm {
   class SparcSubtarget;
 
 class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &SubTarget;
 public:
-  explicit SparcFrameLowering(const SparcSubtarget &/*sti*/)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {
-  }
+  explicit SparcFrameLowering(const SparcSubtarget &ST)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+                          ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8),
+      SubTarget(ST) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 5fa545d..a709685 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -73,7 +73,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
 bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI.getPointerTy());
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
@@ -87,7 +87,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
         if (FrameIndexSDNode *FIN =
                 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
           // Constant offset from frame ref.
-          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+                                             TLI.getPointerTy());
         } else {
           Base = Addr.getOperand(0);
         }
@@ -130,7 +131,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   }
 
   R1 = Addr;
-  R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+  R2 = CurDAG->getRegister(SP::G0, TLI.getPointerTy());
   return true;
 }
 
@@ -146,6 +147,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
 
   case ISD::SDIV:
   case ISD::UDIV: {
+    // sdivx / udivx handle 64-bit divides.
+    if (N->getValueType(0) == MVT::i64)
+      break;
     // FIXME: should use a custom expander to expose the SRA to the dag.
     SDValue DivLHS = N->getOperand(0);
     SDValue DivRHS = N->getOperand(1);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 325f134..3863e2c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SparcISelLowering.h"
 #include "SparcMachineFunctionInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -74,27 +75,118 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
+// Allocate a full-sized argument for the 64-bit ABI.
+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) &&
+         "Can't handle non-64 bits locations");
+
+  // Stack space is allocated for all arguments starting from [%fp+BIAS+128].
+  unsigned Offset = State.AllocateStack(8, 8);
+  unsigned Reg = 0;
+
+  if (LocVT == MVT::i64 && Offset < 6*8)
+    // Promote integers to %i0-%i5.
+    Reg = SP::I0 + Offset/8;
+  else if (LocVT == MVT::f64 && Offset < 16*8)
+    // Promote doubles to %d0-%d30. (Which LLVM calls D0-D15).
+    Reg = SP::D0 + Offset/8;
+  else if (LocVT == MVT::f32 && Offset < 16*8)
+    // Promote floats to %f1, %f3, ...
+    Reg = SP::F1 + Offset/4;
+
+  // Promote to register when possible, otherwise use the stack slot.
+  if (Reg) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  // This argument goes on the stack in an 8-byte slot.
+  // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
+  // the right-aligned float. The first 4 bytes of the stack slot are undefined.
+  if (LocVT == MVT::f32)
+    Offset += 4;
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
+// Allocate a half-sized argument for the 64-bit ABI.
+//
+// This is used when passing { float, int } structs by value in registers.
+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
+  unsigned Offset = State.AllocateStack(4, 4);
+
+  if (LocVT == MVT::f32 && Offset < 16*8) {
+    // Promote floats to %f0-%f31.
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4,
+                                     LocVT, LocInfo));
+    return true;
+  }
+
+  if (LocVT == MVT::i32 && Offset < 6*8) {
+    // Promote integers to %i0-%i5, using half the register.
+    unsigned Reg = SP::I0 + Offset/8;
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::AExt;
+
+    // Set the Custom bit if this i32 goes in the high bits of a register.
+    if (Offset % 8 == 0)
+      State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg,
+                                             LocVT, LocInfo));
+    else
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
 #include "SparcGenCallingConv.inc"
 
+// The calling conventions in SparcCallingConv.td are described in terms of the
+// callee's register window. This function translates registers to the
+// corresponding caller window %o register.
+static unsigned toCallerWindow(unsigned Reg) {
+  assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7 && "Unexpected enum");
+  if (Reg >= SP::I0 && Reg <= SP::I7)
+    return Reg - SP::I0 + SP::O0;
+  return Reg;
+}
+
 SDValue
 SparcTargetLowering::LowerReturn(SDValue Chain,
-                                 CallingConv::ID CallConv, bool isVarArg,
+                                 CallingConv::ID CallConv, bool IsVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
                                  const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
+                                 DebugLoc DL, SelectionDAG &DAG) const {
+  if (Subtarget->is64Bit())
+    return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+  return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+}
 
+SDValue
+SparcTargetLowering::LowerReturn_32(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc DL, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
                  DAG.getTarget(), RVLocs, *DAG.getContext());
 
-  // Analize return values.
-  CCInfo.AnalyzeReturn(Outs, Subtarget->is64Bit() ?
-                             RetCC_Sparc64 : RetCC_Sparc32);
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
 
   SDValue Flag;
   SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -106,7 +198,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
@@ -121,8 +213,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     unsigned Reg = SFI->getSRetReturnReg();
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
-    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
     RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
@@ -135,7 +227,85 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
-  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
+                     &RetOps[0], RetOps.size());
+}
+
+// Lower return values for the 64-bit ABI.
+// Return values are passed the exactly the same way as function arguments.
+SDValue
+SparcTargetLowering::LowerReturn_64(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc DL, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, CC_Sparc64);
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // The second operand on the return instruction is the return address offset.
+  // The return address is always %i7+8 with the 64-bit ABI.
+  RetOps.push_back(DAG.getConstant(8, MVT::i32));
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue OutVal = OutVals[i];
+
+    // Integer return values must be sign or zero extended by the callee.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::ZExt:
+      OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::AExt:
+      OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
+    default:
+      break;
+    }
+
+    // The custom bit on an i32 return value indicates that it should be passed
+    // in the high bits of the register.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+      OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
+                           DAG.getConstant(32, MVT::i32));
+
+      // The next value may go in the low bits of the same register.
+      // Handle both at once.
+      if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) {
+        SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]);
+        OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV);
+        // Skip the next value, it's already done.
+        ++i;
+      }
+    }
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
                      &RetOps[0], RetOps.size());
 }
 
@@ -373,6 +543,9 @@ LowerFormalArguments_64(SDValue Chain,
                  getTargetMachine(), ArgLocs, *DAG.getContext());
   CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
 
+  // The argument array begins at %fp+BIAS+128, after the register save area.
+  const unsigned ArgArea = 128;
+
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     if (VA.isRegLoc()) {
@@ -384,6 +557,11 @@ LowerFormalArguments_64(SDValue Chain,
                                    getRegClassFor(VA.getLocVT()));
       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
 
+      // Get the high bits for i32 struct elements.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+        Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
+                          DAG.getConstant(32, MVT::i32));
+
       // The caller promoted the argument, so insert an Assert?ext SDNode so we
       // won't promote the value again in this function.
       switch (VA.getLocInfo()) {
@@ -409,13 +587,71 @@ LowerFormalArguments_64(SDValue Chain,
 
     // The registers are exhausted. This argument was passed on the stack.
     assert(VA.isMemLoc());
+    // The CC_Sparc64_Full/Half functions compute stack offsets relative to the
+    // beginning of the arguments area at %fp+BIAS+128.
+    unsigned Offset = VA.getLocMemOffset() + ArgArea;
+    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+    // Adjust offset for extended arguments, SPARC is big-endian.
+    // The caller will have written the full slot with extended bytes, but we
+    // prefer our own extending loads.
+    if (VA.isExtInLoc())
+      Offset += 8 - ValSize;
+    int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
+    InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
+                                 DAG.getFrameIndex(FI, getPointerTy()),
+                                 MachinePointerInfo::getFixedStack(FI),
+                                 false, false, false, 0));
   }
+
+  if (!IsVarArg)
+    return Chain;
+
+  // This function takes variable arguments, some of which may have been passed
+  // in registers %i0-%i5. Variable floating point arguments are never passed
+  // in floating point registers. They go on %i0-%i5 or on the stack like
+  // integer arguments.
+  //
+  // The va_start intrinsic needs to know the offset to the first variable
+  // argument.
+  unsigned ArgOffset = CCInfo.getNextStackOffset();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+  // Skip the 128 bytes of register save area.
+  FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea +
+                                  Subtarget->getStackPointerBias());
+
+  // Save the variable arguments that were passed in registers.
+  // The caller is required to reserve stack space for 6 arguments regardless
+  // of how many arguments were actually passed.
+  SmallVector<SDValue, 8> OutChains;
+  for (; ArgOffset < 6*8; ArgOffset += 8) {
+    unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
+    SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+    int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
+    OutChains.push_back(DAG.getStore(Chain, DL, VArg,
+                                     DAG.getFrameIndex(FI, getPointerTy()),
+                                     MachinePointerInfo::getFixedStack(FI),
+                                     false, false, 0));
+  }
+
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &OutChains[0], OutChains.size());
+
   return Chain;
 }
 
 SDValue
 SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget->is64Bit())
+    return LowerCall_64(CLI, InVals);
+  return LowerCall_32(CLI, InVals);
+}
+
+// Lower a call for the 32-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
   DebugLoc &dl                          = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
@@ -618,11 +854,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
+    unsigned Reg = toCallerWindow(RegsToPass[i].first);
     Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -644,13 +876,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   Ops.push_back(Callee);
   if (hasStructRetAttr)
     Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
-  }
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
+                                  RegsToPass[i].second.getValueType()));
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -670,13 +898,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    unsigned Reg = RVLocs[i].getLocReg();
-
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+    Chain = DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
     InVals.push_back(Chain.getValue(0));
@@ -709,6 +931,259 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
   return getDataLayout()->getTypeAllocSize(ElementTy);
 }
 
+
+// Fixup floating point arguments in the ... part of a varargs call.
+//
+// The SPARC v9 ABI requires that floating point arguments are treated the same
+// as integers when calling a varargs function. This does not apply to the
+// fixed arguments that are part of the function's prototype.
+//
+// This function post-processes a CCValAssign array created by
+// AnalyzeCallOperands().
+static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs,
+                                   ArrayRef<ISD::OutputArg> Outs) {
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    // FIXME: What about f32 arguments? C promotes them to f64 when calling
+    // varargs functions.
+    if (!VA.isRegLoc() || VA.getLocVT() != MVT::f64)
+      continue;
+    // The fixed arguments to a varargs function still go in FP registers.
+    if (Outs[VA.getValNo()].IsFixed)
+      continue;
+
+    // This floating point argument should be reassigned.
+    CCValAssign NewVA;
+
+    // Determine the offset into the argument array.
+    unsigned Offset = 8 * (VA.getLocReg() - SP::D0);
+    assert(Offset < 16*8 && "Offset out of range, bad register enum?");
+
+    if (Offset < 6*8) {
+      // This argument should go in %i0-%i5.
+      unsigned IReg = SP::I0 + Offset/8;
+      // Full register, just bitconvert into i64.
+      NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
+                                  IReg, MVT::i64, CCValAssign::BCvt);
+    } else {
+      // This needs to go to memory, we're out of integer registers.
+      NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
+                                  Offset, VA.getLocVT(), VA.getLocInfo());
+    }
+    ArgLocs[i] = NewVA;
+  }
+}
+
+// Lower a call for the 64-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc DL = CLI.DL;
+  SDValue Chain = CLI.Chain;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  // The stack offset computed by CC_Sparc64 includes all arguments.
+  // Called functions expect 6 argument words to exist in the stack frame, used
+  // or not.
+  unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
+
+  // Keep stack frames 16-byte aligned.
+  ArgsSize = RoundUpToAlignment(ArgsSize, 16);
+
+  // Varargs calls require special treatment.
+  if (CLI.IsVarArg)
+    fixupVariableFloatArgs(ArgLocs, CLI.Outs);
+
+  // Adjust the stack pointer to make room for the arguments.
+  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+  // with more than 6 arguments.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+  // Collect the set of registers to pass to the function and their values.
+  // This will be emitted as a sequence of CopyToReg nodes glued to the call
+  // instruction.
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  // Collect chains from all the memory opeations that copy arguments to the
+  // stack. They must follow the stack pointer adjustment above and precede the
+  // call instruction itself.
+  SmallVector<SDValue, 8> MemOpChains;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = CLI.OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown location info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      // The custom bit on an i32 return value indicates that it should be
+      // passed in the high bits of the register.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+        Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
+                          DAG.getConstant(32, MVT::i32));
+
+        // The next value may go in the low bits of the same register.
+        // Handle both at once.
+        if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
+            ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
+          SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
+                                   CLI.OutVals[i+1]);
+          Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
+          // Skip the next value, it's already done.
+          ++i;
+        }
+      }
+      RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+    // The argument area starts at %fp+BIAS+128 in the callee frame,
+    // %sp+BIAS+128 in ours.
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
+                                           Subtarget->getStackPointerBias() +
+                                           128);
+    PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+  // Emit all stores, make sure they occur before the call.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of CopyToReg nodes glued together with token chain and
+  // glue operands which copy the outgoing args into registers. The InGlue is
+  // necessary since all emitted instructions must be stuck together in order
+  // to pass the live physical registers.
+  SDValue InGlue;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, DL,
+                             RegsToPass[i].first, RegsToPass[i].second, InGlue);
+    InGlue = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  SDValue Callee = CLI.Callee;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+  // Build the operands for the call instruction itself.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Make sure the CopyToReg nodes are glued to the call instruction which
+  // consumes the registers.
+  if (InGlue.getNode())
+    Ops.push_back(InGlue);
+
+  // Now the call itself.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+  InGlue = Chain.getValue(1);
+
+  // Revert the stack pointer immediately after the call.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InGlue);
+  InGlue = Chain.getValue(1);
+
+  // Now extract the return values. This is more or less the same as
+  // LowerFormalArguments_64.
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+  RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    unsigned Reg = toCallerWindow(VA.getLocReg());
+
+    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+    // reside in the same register in the high and low bits. Reuse the
+    // CopyFromReg previous node to avoid duplicate copies.
+    SDValue RV;
+    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+        RV = Chain.getValue(0);
+
+    // But usually we'll create a new CopyFromReg for a different register.
+    if (!RV.getNode()) {
+      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+      Chain = RV.getValue(1);
+      InGlue = Chain.getValue(2);
+    }
+
+    // Get the high bits for i32 struct elements.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+                       DAG.getConstant(32, MVT::i32));
+
+    // The callee promoted the return value, so insert an Assert?ext SDNode so
+    // we won't promote the value again in this function.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate the register down to the return value type.
+    if (VA.isExtInLoc())
+      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+    InVals.push_back(RV);
+  }
+
+  return Chain;
+}
+
 //===----------------------------------------------------------------------===//
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
@@ -778,9 +1253,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
   // Custom legalize GlobalAddress nodes into LO/HI parts.
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-  setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::ConstantPool, getPointerTy(), Custom);
 
   // Sparc doesn't have sext_inreg, replace them with shl/sra
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -831,7 +1306,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
 
   // FIXME: There are instructions available for ATOMIC_FENCE
   // on SparcV8 and later.
-  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
 
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -965,46 +1439,89 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   }
 }
 
+// Convert to a target node and set target flags.
+SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
+                                             SelectionDAG &DAG) const {
+  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
+    return DAG.getTargetGlobalAddress(GA->getGlobal(),
+                                      GA->getDebugLoc(),
+                                      GA->getValueType(0),
+                                      GA->getOffset(), TF);
+
+  if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+    return DAG.getTargetConstantPool(CP->getConstVal(),
+                                     CP->getValueType(0),
+                                     CP->getAlignment(),
+                                     CP->getOffset(), TF);
+
+  if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
+    return DAG.getTargetExternalSymbol(ES->getSymbol(),
+                                       ES->getValueType(0), TF);
+
+  llvm_unreachable("Unhandled address SDNode");
+}
+
+// Split Op into high and low parts according to HiTF and LoTF.
+// Return an ADD node combining the parts.
+SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
+                                          unsigned HiTF, unsigned LoTF,
+                                          SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
+  SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
+  return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+}
+
+// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
+// or ExternalSymbol SDNode.
+SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = getPointerTy();
+
+  // Handle PIC mode first.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    // This is the pic32 code model, the GOT is known to be smaller than 4GB.
+    SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
+    SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
+    return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
+                       MachinePointerInfo::getGOT(), false, false, false, 0);
+  }
+
+  // This is one of the absolute code models.
+  switch(getTargetMachine().getCodeModel()) {
+  default:
+    llvm_unreachable("Unsupported absolute code model");
+  case CodeModel::Small:
+    // abs32.
+    return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+  case CodeModel::Medium: {
+    // abs44.
+    SDValue H44 = makeHiLoPair(Op, SPII::MO_H44, SPII::MO_M44, DAG);
+    H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32));
+    SDValue L44 = withTargetFlags(Op, SPII::MO_L44, DAG);
+    L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
+    return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
+  }
+  case CodeModel::Large: {
+    // abs64.
+    SDValue Hi = makeHiLoPair(Op, SPII::MO_HH, SPII::MO_HM, DAG);
+    Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32));
+    SDValue Lo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+  }
+  }
+}
+
 SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
-
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
                                                SelectionDAG &DAG) const {
-  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  const Constant *C = N->getConstVal();
-  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -1092,14 +1609,13 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   SDValue Offset =
-    DAG.getNode(ISD::ADD, dl, MVT::i32,
-                DAG.getRegister(SP::I6, MVT::i32),
-                DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
-                                MVT::i32));
+    DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(),
+                DAG.getRegister(SP::I6, TLI.getPointerTy()),
+                DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset()));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+  return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
                       MachinePointerInfo(SV), false, false, 0);
 }
 
@@ -1108,33 +1624,22 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Node->getValueType(0);
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
+  EVT PtrVT = VAListPtr.getValueType();
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-  DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+  DebugLoc DL = Node->getDebugLoc();
+  SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr,
                                MachinePointerInfo(SV), false, false, false, 0);
-  // Increment the pointer, VAList, to the next vaarg
-  SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
-                                  DAG.getConstant(VT.getSizeInBits()/8,
-                                                  MVT::i32));
-  // Store the incremented VAList to the legalized pointer
-  InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+  // Increment the pointer, VAList, to the next vaarg.
+  SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                                DAG.getIntPtrConstant(VT.getSizeInBits()/8));
+  // Store the incremented VAList to the legalized pointer.
+  InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr,
                          VAListPtr, MachinePointerInfo(SV), false, false, 0);
-  // Load the actual argument out of the pointer VAList, unless this is an
-  // f64 load.
-  if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
-                       false, false, false, 0);
-
-  // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
-                          false, false, false, 0);
-
-  // Bit-Convert the value to f64.
-  SDValue Ops[2] = {
-    DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
-    V.getValue(1)
-  };
-  return DAG.getMergeValues(Ops, 2, dl);
+  // Load the actual argument out of the pointer VAList.
+  // We can't count on greater alignment than the word size.
+  return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
+                     false, false, false,
+                     std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
 }
 
 static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index aa2ef71..fd706be 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -71,6 +71,7 @@ namespace llvm {
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -95,6 +96,10 @@ namespace llvm {
     virtual SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
                 SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
@@ -102,11 +107,25 @@ namespace llvm {
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerReturn_32(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           DebugLoc DL, SelectionDAG &DAG) const;
+    SDValue LowerReturn_64(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           DebugLoc DL, SelectionDAG &DAG) const;
 
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 
     unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+    SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
+    SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+                         SelectionDAG &DAG) const;
+    SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index ca1153b..91805f9 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -40,6 +40,9 @@ let Predicates = [Is64Bit] in {
 def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
 def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
 
+def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>;
+
 defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
 defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
 defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
@@ -130,7 +133,7 @@ def HM10 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(Val, MVT::i32);
 }]>;
 def : Pat<(i64 imm:$val),
-          (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i64 32)),
+          (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)),
                 (ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
       Requires<[Is64Bit]>;
 
@@ -178,6 +181,45 @@ def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>;
 
 
 //===----------------------------------------------------------------------===//
+// 64-bit Integer Multiply and Divide.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+def MULXrr : F3_1<2, 0b001001,
+                  (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                  "mulx $rs1, $rs2, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, i64:$rs2))]>;
+def MULXri : F3_2<2, 0b001001,
+                  (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                  "mulx $rs1, $i, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, (i64 simm13:$i)))]>;
+
+// Division can trap.
+let hasSideEffects = 1 in {
+def SDIVXrr : F3_1<2, 0b101101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "sdivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, i64:$rs2))]>;
+def SDIVXri : F3_2<2, 0b101101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "sdivx $rs1, $i, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, (i64 simm13:$i)))]>;
+
+def UDIVXrr : F3_1<2, 0b001101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "udivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, i64:$rs2))]>;
+def UDIVXri : F3_2<2, 0b001101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "udivx $rs1, $i, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, (i64 simm13:$i)))]>;
+} // hasSideEffects = 1
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
 // 64-bit Loads and Stores.
 //===----------------------------------------------------------------------===//
 //
@@ -203,16 +245,22 @@ def LDXri  : F3_2<3, 0b001011,
 // Extending loads to i64.
 def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
 def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRrr:$addr)),  (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRri:$addr)),  (LDUBri ADDRri:$addr)>;
 def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
 def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
 
 def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
 def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRrr:$addr)),  (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRri:$addr)),  (LDUHri ADDRri:$addr)>;
 def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
 def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
 
 def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
 def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRrr:$addr)),  (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRri:$addr)),  (LDri ADDRri:$addr)>;
 
 // Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
 def LDSWrr : F3_1<3, 0b001011,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
index f101856..e7fde08 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -142,10 +142,10 @@ class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
 // Define rr and ri shift instructions with patterns.
 multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
                 ValueType VT, RegisterClass RC> {
-  def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, RC:$rs2),
+  def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, IntRegs:$rs2),
                  !strconcat(OpcStr, " $rs, $rs2, $rd"),
-                 [(set VT:$rd, (OpNode VT:$rs, VT:$rs2))]>;
-  def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, unknown:$shcnt),
+                 [(set VT:$rd, (OpNode VT:$rs, i32:$rs2))]>;
+  def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, i32imm:$shcnt),
                  !strconcat(OpcStr, " $rs, $shcnt, $rd"),
-                 [(set VT:$rd, (OpNode VT:$rs, (VT imm:$shcnt)))]>;
+                 [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>;
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5ff4395..baefb06 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -64,8 +64,7 @@ def HI22 : SDNodeXForm<imm, [{
 }]>;
 
 def SETHIimm : PatLeaf<(imm), [{
-  return (((unsigned)N->getZExtValue() >> 10) << 10) ==
-         (unsigned)N->getZExtValue();
+  return isShiftedUInt<22, 10>(N->getZExtValue());
 }], HI22>;
 
 // Addressing modes.
@@ -796,10 +795,8 @@ def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
 def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
 
 // Add reg, lo.  This is used when taking the addr of a global/constpool entry.
-def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)),
-          (ADDri $r, tglobaladdr:$in)>;
-def : Pat<(add i32:$r, (SPlo tconstpool:$in)),
-          (ADDri $r, tconstpool:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)),  (ADDri $r, tconstpool:$in)>;
 
 // Calls: 
 def : Pat<(call tglobaladdr:$dst),
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index db9b30e..3af4c61 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -74,8 +74,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Addressable stack objects are accessed using neg. offsets from %fp
   MachineFunction &MF = *MI.getParent()->getParent();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MI.getOperand(FIOperandNum + 1).getImm();
+  int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+                   MI.getOperand(FIOperandNum + 1).getImm() +
+                   Subtarget.getStackPointerBias();
 
   // Replace frame index with a frame pointer reference.
   if (Offset >= -4096 && Offset <= 4095) {
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index a81931b..b94dd11 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -52,6 +52,12 @@ public:
     }
     return std::string(p);
   }
+
+  /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame
+  /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS].
+  int64_t getStackPointerBias() const {
+    return is64Bit() ? 2047 : 0;
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
new file mode 100644
index 0000000..c7725a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -0,0 +1,689 @@
+//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+// Return true if Expr is in the range [MinValue, MaxValue].
+static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
+    int64_t Value = CE->getValue();
+    return Value >= MinValue && Value <= MaxValue;
+  }
+  return false;
+}
+
+namespace {
+class SystemZOperand : public MCParsedAsmOperand {
+public:
+  enum RegisterKind {
+    GR32Reg,
+    GR64Reg,
+    GR128Reg,
+    ADDR32Reg,
+    ADDR64Reg,
+    FP32Reg,
+    FP64Reg,
+    FP128Reg
+  };
+
+private:
+  enum OperandKind {
+    KindToken,
+    KindReg,
+    KindAccessReg,
+    KindImm,
+    KindMem
+  };
+
+  OperandKind Kind;
+  SMLoc StartLoc, EndLoc;
+
+  // A string of length Length, starting at Data.
+  struct TokenOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  // LLVM register Num, which has kind Kind.
+  struct RegOp {
+    RegisterKind Kind;
+    unsigned Num;
+  };
+
+  // Base + Disp + Index, where Base and Index are LLVM registers or 0.
+  // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg).
+  struct MemOp {
+    unsigned Base : 8;
+    unsigned Index : 8;
+    unsigned RegKind : 8;
+    unsigned Unused : 8;
+    const MCExpr *Disp;
+  };
+
+  union {
+    TokenOp Token;
+    RegOp Reg;
+    unsigned AccessReg;
+    const MCExpr *Imm;
+    MemOp Mem;
+  };
+
+  SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
+    : Kind(kind), StartLoc(startLoc), EndLoc(endLoc)
+  {}
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+public:
+  // Create particular kinds of operand.
+  static SystemZOperand *createToken(StringRef Str, SMLoc Loc) {
+    SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc);
+    Op->Token.Data = Str.data();
+    Op->Token.Length = Str.size();
+    return Op;
+  }
+  static SystemZOperand *createReg(RegisterKind Kind, unsigned Num,
+                                   SMLoc StartLoc, SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindReg, StartLoc, EndLoc);
+    Op->Reg.Kind = Kind;
+    Op->Reg.Num = Num;
+    return Op;
+  }
+  static SystemZOperand *createAccessReg(unsigned Num, SMLoc StartLoc,
+                                         SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindAccessReg, StartLoc, EndLoc);
+    Op->AccessReg = Num;
+    return Op;
+  }
+  static SystemZOperand *createImm(const MCExpr *Expr, SMLoc StartLoc,
+                                   SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindImm, StartLoc, EndLoc);
+    Op->Imm = Expr;
+    return Op;
+  }
+  static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base,
+                                   const MCExpr *Disp, unsigned Index,
+                                   SMLoc StartLoc, SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc);
+    Op->Mem.RegKind = RegKind;
+    Op->Mem.Base = Base;
+    Op->Mem.Index = Index;
+    Op->Mem.Disp = Disp;
+    return Op;
+  }
+
+  // Token operands
+  virtual bool isToken() const LLVM_OVERRIDE {
+    return Kind == KindToken;
+  }
+  StringRef getToken() const {
+    assert(Kind == KindToken && "Not a token");
+    return StringRef(Token.Data, Token.Length);
+  }
+
+  // Register operands.
+  virtual bool isReg() const LLVM_OVERRIDE {
+    return Kind == KindReg;
+  }
+  bool isReg(RegisterKind RegKind) const {
+    return Kind == KindReg && Reg.Kind == RegKind;
+  }
+  virtual unsigned getReg() const LLVM_OVERRIDE {
+    assert(Kind == KindReg && "Not a register");
+    return Reg.Num;
+  }
+
+  // Access register operands.  Access registers aren't exposed to LLVM
+  // as registers.
+  bool isAccessReg() const {
+    return Kind == KindAccessReg;
+  }
+
+  // Immediate operands.
+  virtual bool isImm() const LLVM_OVERRIDE {
+    return Kind == KindImm;
+  }
+  bool isImm(int64_t MinValue, int64_t MaxValue) const {
+    return Kind == KindImm && inRange(Imm, MinValue, MaxValue);
+  }
+  const MCExpr *getImm() const {
+    assert(Kind == KindImm && "Not an immediate");
+    return Imm;
+  }
+
+  // Memory operands.
+  virtual bool isMem() const LLVM_OVERRIDE {
+    return Kind == KindMem;
+  }
+  bool isMem(RegisterKind RegKind, bool HasIndex) const {
+    return (Kind == KindMem &&
+            Mem.RegKind == RegKind &&
+            (HasIndex || !Mem.Index));
+  }
+  bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const {
+    return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff);
+  }
+  bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const {
+    return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287);
+  }
+
+  // Override MCParsedAsmOperand.
+  virtual SMLoc getStartLoc() const LLVM_OVERRIDE { return StartLoc; }
+  virtual SMLoc getEndLoc() const LLVM_OVERRIDE { return EndLoc; }
+  virtual void print(raw_ostream &OS) const LLVM_OVERRIDE;
+
+  // Used by the TableGen code to add particular types of operand
+  // to an instruction.
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+  void addAccessRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    assert(Kind == KindAccessReg && "Invalid operand type");
+    Inst.addOperand(MCOperand::CreateImm(AccessReg));
+  }
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    addExpr(Inst, getImm());
+  }
+  void addBDAddrOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands");
+    assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type");
+    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+    addExpr(Inst, Mem.Disp);
+  }
+  void addBDXAddrOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands");
+    assert(Kind == KindMem && "Invalid operand type");
+    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+    addExpr(Inst, Mem.Disp);
+    Inst.addOperand(MCOperand::CreateReg(Mem.Index));
+  }
+
+  // Used by the TableGen code to check for particular operand types.
+  bool isGR32() const { return isReg(GR32Reg); }
+  bool isGR64() const { return isReg(GR64Reg); }
+  bool isGR128() const { return isReg(GR128Reg); }
+  bool isADDR32() const { return isReg(ADDR32Reg); }
+  bool isADDR64() const { return isReg(ADDR64Reg); }
+  bool isADDR128() const { return false; }
+  bool isFP32() const { return isReg(FP32Reg); }
+  bool isFP64() const { return isReg(FP64Reg); }
+  bool isFP128() const { return isReg(FP128Reg); }
+  bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); }
+  bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); }
+  bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); }
+  bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); }
+  bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); }
+  bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); }
+  bool isU4Imm() const { return isImm(0, 15); }
+  bool isU6Imm() const { return isImm(0, 63); }
+  bool isU8Imm() const { return isImm(0, 255); }
+  bool isS8Imm() const { return isImm(-128, 127); }
+  bool isU16Imm() const { return isImm(0, 65535); }
+  bool isS16Imm() const { return isImm(-32768, 32767); }
+  bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
+  bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); }
+};
+
+// Maps of asm register numbers to LLVM register numbers, with 0 indicating
+// an invalid register.  We don't use register class directly because that
+// specifies the allocation order.
+static const unsigned GR32Regs[] = {
+  SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
+  SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
+  SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
+  SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
+};
+static const unsigned GR64Regs[] = {
+  SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
+  SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
+  SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
+  SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
+};
+static const unsigned GR128Regs[] = {
+  SystemZ::R0Q, 0, SystemZ::R2Q, 0,
+  SystemZ::R4Q, 0, SystemZ::R6Q, 0,
+  SystemZ::R8Q, 0, SystemZ::R10Q, 0,
+  SystemZ::R12Q, 0, SystemZ::R14Q, 0
+};
+static const unsigned FP32Regs[] = {
+  SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+  SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+  SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+  SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
+};
+static const unsigned FP64Regs[] = {
+  SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+  SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+  SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+  SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
+};
+static const unsigned FP128Regs[] = {
+  SystemZ::F0Q, SystemZ::F1Q, 0, 0,
+  SystemZ::F4Q, SystemZ::F5Q, 0, 0,
+  SystemZ::F8Q, SystemZ::F9Q, 0, 0,
+  SystemZ::F12Q, SystemZ::F13Q, 0, 0
+};
+
+class SystemZAsmParser : public MCTargetAsmParser {
+#define GET_ASSEMBLER_HEADER
+#include "SystemZGenAsmMatcher.inc"
+
+private:
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+  struct Register {
+    char Prefix;
+    unsigned Number;
+    SMLoc StartLoc, EndLoc;
+  };
+
+  bool parseRegister(Register &Reg);
+
+  OperandMatchResultTy
+  parseRegister(Register &Reg, char Prefix, const unsigned *Regs,
+                bool IsAddress = false);
+
+  OperandMatchResultTy
+  parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                char Prefix, const unsigned *Regs,
+                SystemZOperand::RegisterKind Kind,
+                bool IsAddress = false);
+
+  OperandMatchResultTy
+  parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+               const unsigned *Regs, SystemZOperand::RegisterKind RegKind,
+               bool HasIndex);
+
+  bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                    StringRef Mnemonic);
+
+public:
+  SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+    : MCTargetAsmParser(), STI(sti), Parser(parser) {
+    MCAsmParserExtension::Initialize(Parser);
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  // Override MCTargetAsmParser.
+  virtual bool ParseDirective(AsmToken DirectiveID) LLVM_OVERRIDE;
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                             SMLoc &EndLoc) LLVM_OVERRIDE;
+  virtual bool ParseInstruction(ParseInstructionInfo &Info,
+                                StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands)
+    LLVM_OVERRIDE;
+  virtual bool
+    MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                            SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                            MCStreamer &Out, unsigned &ErrorInfo,
+                            bool MatchingInlineAsm) LLVM_OVERRIDE;
+
+  // Used by the TableGen code to parse particular operand types.
+  OperandMatchResultTy
+  parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg);
+  }
+  OperandMatchResultTy
+  parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg);
+  }
+  OperandMatchResultTy
+  parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg);
+  }
+  OperandMatchResultTy
+  parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg,
+                         true);
+  }
+  OperandMatchResultTy
+  parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg,
+                         true);
+  }
+  OperandMatchResultTy
+  parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    llvm_unreachable("Shouldn't be used as an operand");
+  }
+  OperandMatchResultTy
+  parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg);
+  }
+  OperandMatchResultTy
+  parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg);
+  }
+  OperandMatchResultTy
+  parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg);
+  }
+  OperandMatchResultTy
+  parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false);
+  }
+  OperandMatchResultTy
+  parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false);
+  }
+  OperandMatchResultTy
+  parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true);
+  }
+  OperandMatchResultTy
+  parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+};
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "SystemZGenAsmMatcher.inc"
+
+void SystemZOperand::print(raw_ostream &OS) const {
+  llvm_unreachable("Not implemented");
+}
+
+// Parse one register of the form %<prefix><number>.
+bool SystemZAsmParser::parseRegister(Register &Reg) {
+  Reg.StartLoc = Parser.getTok().getLoc();
+
+  // Eat the % prefix.
+  if (Parser.getTok().isNot(AsmToken::Percent))
+    return true;
+  Parser.Lex();
+
+  // Expect a register name.
+  if (Parser.getTok().isNot(AsmToken::Identifier))
+    return true;
+
+  // Check the prefix.
+  StringRef Name = Parser.getTok().getString();
+  if (Name.size() < 2)
+    return true;
+  Reg.Prefix = Name[0];
+
+  // Treat the rest of the register name as a register number.
+  if (Name.substr(1).getAsInteger(10, Reg.Number))
+    return true;
+
+  Reg.EndLoc = Parser.getTok().getLoc();
+  Parser.Lex();
+  return false;
+}
+
+// Parse a register with prefix Prefix and convert it to LLVM numbering.
+// Regs maps asm register numbers to LLVM register numbers, with zero
+// entries indicating an invalid register.  IsAddress says whether the
+// register appears in an address context.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseRegister(Register &Reg, char Prefix,
+                                const unsigned *Regs, bool IsAddress) {
+  if (parseRegister(Reg))
+    return MatchOperand_NoMatch;
+  if (Reg.Prefix != Prefix || Reg.Number > 15 || Regs[Reg.Number] == 0) {
+    Error(Reg.StartLoc, "invalid register");
+    return MatchOperand_ParseFail;
+  }
+  if (Reg.Number == 0 && IsAddress) {
+    Error(Reg.StartLoc, "%r0 used in an address");
+    return MatchOperand_ParseFail;
+  }
+  Reg.Number = Regs[Reg.Number];
+  return MatchOperand_Success;
+}
+
+// Parse a register and add it to Operands.  Prefix is 'r' for GPRs,
+// 'f' for FPRs, etc.  Regs maps asm register numbers to LLVM register numbers,
+// with zero entries indicating an invalid register.  Kind is the type of
+// register represented by Regs and IsAddress says whether the register is
+// being parsed in an address context, meaning that %r0 evaluates as 0.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                char Prefix, const unsigned *Regs,
+                                SystemZOperand::RegisterKind Kind,
+                                bool IsAddress) {
+  Register Reg;
+  OperandMatchResultTy Result = parseRegister(Reg, Prefix, Regs, IsAddress);
+  if (Result == MatchOperand_Success)
+    Operands.push_back(SystemZOperand::createReg(Kind, Reg.Number,
+                                                 Reg.StartLoc, Reg.EndLoc));
+  return Result;
+}
+
+// Parse a memory operand and add it to Operands.  Regs maps asm register
+// numbers to LLVM address registers and RegKind says what kind of address
+// register we're using (ADDR32Reg or ADDR64Reg).  HasIndex says whether
+// the address allows index registers.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               const unsigned *Regs,
+                               SystemZOperand::RegisterKind RegKind,
+                               bool HasIndex) {
+  SMLoc StartLoc = Parser.getTok().getLoc();
+
+  // Parse the displacement, which must always be present.
+  const MCExpr *Disp;
+  if (getParser().parseExpression(Disp))
+    return MatchOperand_NoMatch;
+
+  // Parse the optional base and index.
+  unsigned Index = 0;
+  unsigned Base = 0;
+  if (getLexer().is(AsmToken::LParen)) {
+    Parser.Lex();
+
+    // Parse the first register.
+    Register Reg;
+    OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true);
+    if (Result != MatchOperand_Success)
+      return Result;
+
+    // Check whether there's a second register.  If so, the one that we
+    // just parsed was the index.
+    if (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();
+
+      if (!HasIndex) {
+        Error(Reg.StartLoc, "invalid use of indexed addressing");
+        return MatchOperand_ParseFail;
+      }
+
+      Index = Reg.Number;
+      Result = parseRegister(Reg, 'r', GR64Regs, true);
+      if (Result != MatchOperand_Success)
+        return Result;
+    }
+    Base = Reg.Number;
+
+    // Consume the closing bracket.
+    if (getLexer().isNot(AsmToken::RParen))
+      return MatchOperand_NoMatch;
+    Parser.Lex();
+  }
+
+  SMLoc EndLoc =
+    SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index,
+                                               StartLoc, EndLoc));
+  return MatchOperand_Success;
+}
+
+bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
+  return true;
+}
+
+bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                     SMLoc &EndLoc) {
+  Register Reg;
+  if (parseRegister(Reg))
+    return Error(Reg.StartLoc, "register expected");
+  if (Reg.Prefix == 'r' && Reg.Number < 16)
+    RegNo = GR64Regs[Reg.Number];
+  else if (Reg.Prefix == 'f' && Reg.Number < 16)
+    RegNo = FP64Regs[Reg.Number];
+  else
+    return Error(Reg.StartLoc, "invalid register");
+  StartLoc = Reg.StartLoc;
+  EndLoc = Reg.EndLoc;
+  return false;
+}
+
+bool SystemZAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  Operands.push_back(SystemZOperand::createToken(Name, NameLoc));
+
+  // Read the remaining operands.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (parseOperand(Operands, Name)) {
+      Parser.eatToEndOfStatement();
+      return true;
+    }
+
+    // Read any subsequent operands.
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();
+      if (parseOperand(Operands, Name)) {
+        Parser.eatToEndOfStatement();
+        return true;
+      }
+    }
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      SMLoc Loc = getLexer().getLoc();
+      Parser.eatToEndOfStatement();
+      return Error(Loc, "unexpected token in argument list");
+    }
+  }
+
+  // Consume the EndOfStatement.
+  Parser.Lex();
+  return false;
+}
+
+bool SystemZAsmParser::
+parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+             StringRef Mnemonic) {
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  if (ResTy == MatchOperand_Success)
+    return false;
+
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_ParseFail)
+    return true;
+
+  // The only other type of operand is an immediate.
+  const MCExpr *Expr;
+  SMLoc StartLoc = Parser.getTok().getLoc();
+  if (getParser().parseExpression(Expr))
+    return true;
+
+  SMLoc EndLoc =
+    SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+  return false;
+}
+
+bool SystemZAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult;
+
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                     MatchingInlineAsm);
+  switch (MatchResult) {
+  default: break;
+  case Match_Success:
+    Inst.setLoc(IDLoc);
+    Out.EmitInstruction(Inst);
+    return false;
+
+  case Match_MissingFeature: {
+    assert(ErrorInfo && "Unknown missing feature!");
+    // Special case the error message for the very common case where only
+    // a single subtarget feature is missing
+    std::string Msg = "instruction requires:";
+    unsigned Mask = 1;
+    for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
+      if (ErrorInfo & Mask) {
+        Msg += " ";
+        Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+      }
+      Mask <<= 1;
+    }
+    return Error(IDLoc, Msg);
+  }
+
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((SystemZOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
+    }
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  case Match_MnemonicFail:
+    return Error(IDLoc, "invalid instruction");
+  }
+
+  llvm_unreachable("Unexpected match type");
+}
+
+SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
+parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  Register Reg;
+  if (parseRegister(Reg))
+    return MatchOperand_NoMatch;
+  if (Reg.Prefix != 'a' || Reg.Number > 15) {
+    Error(Reg.StartLoc, "invalid register");
+    return MatchOperand_ParseFail;
+  }
+  Operands.push_back(SystemZOperand::createAccessReg(Reg.Number,
+                                                     Reg.StartLoc, Reg.EndLoc));
+  return MatchOperand_Success;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmParser() {
+  RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
new file mode 100644
index 0000000..d73cf49
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -0,0 +1,150 @@
+//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "SystemZInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
+                                      unsigned Index, raw_ostream &O) {
+  O << Disp;
+  if (Base) {
+    O << '(';
+    if (Index)
+      O << '%' << getRegisterName(Index) << ',';
+    O << '%' << getRegisterName(Base) << ')';
+  } else
+    assert(!Index && "Shouldn't have an index without a base");
+}
+
+void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
+  if (MO.isReg())
+    O << '%' << getRegisterName(MO.getReg());
+  else if (MO.isImm())
+    O << MO.getImm();
+  else if (MO.isExpr())
+    O << *MO.getExpr();
+  else
+    llvm_unreachable("Invalid operand");
+}
+
+void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+  O << '%' << getRegisterName(RegNo);
+}
+
+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<4>(Value) && "Invalid u4imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<6>(Value) && "Invalid u6imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isInt<8>(Value) && "Invalid s8imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<8>(Value) && "Invalid u8imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isInt<16>(Value) && "Invalid s16imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<16>(Value) && "Invalid u16imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isInt<32>(Value) && "Invalid s32imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<32>(Value) && "Invalid u32imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
+                                               raw_ostream &O) {
+  uint64_t Value = MI->getOperand(OpNum).getImm();
+  assert(Value < 16 && "Invalid access register number");
+  O << "%a" << (unsigned int)Value;
+}
+
+void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
+                                          raw_ostream &O) {
+  printOperand(MI, OpNum, O);
+  O << "@PLT";
+}
+
+void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
+                                      raw_ostream &O) {
+  printOperand(MI->getOperand(OpNum), O);
+}
+
+void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(), 0, O);
+}
+
+void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(),
+               MI->getOperand(OpNum + 2).getReg(), O);
+}
+
+void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  static const char *const CondNames[] = {
+    "o", "h", "nle", "l", "nhe", "lh", "ne",
+    "e", "nlh", "he", "nl", "le", "nh", "no"
+  };
+  uint64_t Imm = MI->getOperand(OpNum).getImm();
+  assert(Imm > 0 && Imm < 15 && "Invalid condition");
+  O << CondNames[Imm - 1];
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
new file mode 100644
index 0000000..b82e79d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -0,0 +1,68 @@
+//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a SystemZ MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZINSTPRINTER_H
+#define LLVM_SYSTEMZINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCOperand;
+
+class SystemZInstPrinter : public MCInstPrinter {
+public:
+  SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Automatically generated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  // Print an address with the given base, displacement and index.
+  static void printAddress(unsigned Base, int64_t Disp, unsigned Index,
+                           raw_ostream &O);
+
+  // Print the given operand.
+  static void printOperand(const MCOperand &MO, raw_ostream &O);
+
+  // Override MCInstPrinter.
+  virtual void printRegName(raw_ostream &O, unsigned RegNo) const
+    LLVM_OVERRIDE;
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot)
+    LLVM_OVERRIDE;
+
+private:
+  // Print various types of operand.
+  void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+
+  // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
+  // This forms part of the instruction name rather than the operand list.
+  void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
new file mode 100644
index 0000000..e901c6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -0,0 +1,151 @@
+//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+using namespace llvm;
+
+// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot].
+// Return the bits that should be installed in a relocation field for
+// fixup kind Kind.
+static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
+  if (Kind < FirstTargetFixupKind)
+    return Value;
+
+  switch (unsigned(Kind)) {
+  case SystemZ::FK_390_PC16DBL:
+  case SystemZ::FK_390_PC32DBL:
+  case SystemZ::FK_390_PLT16DBL:
+  case SystemZ::FK_390_PLT32DBL:
+    return (int64_t)Value / 2;
+  }
+
+  llvm_unreachable("Unknown fixup kind!");
+}
+
+// If Opcode can be relaxed, return the relaxed form, otherwise return 0.
+static unsigned getRelaxedOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case SystemZ::BRC:  return SystemZ::BRCL;
+  case SystemZ::J:    return SystemZ::JG;
+  case SystemZ::BRAS: return SystemZ::BRASL;
+  }
+  return 0;
+}
+
+namespace {
+class SystemZMCAsmBackend : public MCAsmBackend {
+  uint8_t OSABI;
+public:
+  SystemZMCAsmBackend(uint8_t osABI)
+    : OSABI(osABI) {}
+
+  // Override MCAsmBackend
+  virtual unsigned getNumFixupKinds() const LLVM_OVERRIDE {
+    return SystemZ::NumTargetFixupKinds;
+  }
+  virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const
+    LLVM_OVERRIDE;
+  virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                          uint64_t Value) const LLVM_OVERRIDE;
+  virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE;
+  virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                                    uint64_t Value,
+                                    const MCRelaxableFragment *Fragment,
+                                    const MCAsmLayout &Layout) const
+    LLVM_OVERRIDE;
+  virtual void relaxInstruction(const MCInst &Inst,
+                                MCInst &Res) const LLVM_OVERRIDE;
+  virtual bool writeNopData(uint64_t Count,
+                            MCObjectWriter *OW) const LLVM_OVERRIDE;
+  virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const
+    LLVM_OVERRIDE {
+    return createSystemZObjectWriter(OS, OSABI);
+  }
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const
+    LLVM_OVERRIDE {
+    return false;
+  }
+};
+} // end anonymous namespace
+
+const MCFixupKindInfo &
+SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
+    { "FK_390_PC16DBL",  0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_390_PC32DBL",  0, 32, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+  };
+
+  if (Kind < FirstTargetFixupKind)
+    return MCAsmBackend::getFixupKindInfo(Kind);
+
+  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+         "Invalid kind!");
+  return Infos[Kind - FirstTargetFixupKind];
+}
+
+void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  MCFixupKind Kind = Fixup.getKind();
+  unsigned Offset = Fixup.getOffset();
+  unsigned Size = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+
+  assert(Offset + Size <= DataSize && "Invalid fixup offset!");
+
+  // Big-endian insertion of Size bytes.
+  Value = extractBitsForFixup(Kind, Value);
+  unsigned ShiftValue = (Size * 8) - 8;
+  for (unsigned I = 0; I != Size; ++I) {
+    Data[Offset + I] |= uint8_t(Value >> ShiftValue);
+    ShiftValue -= 8;
+  }
+}
+
+bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+  return getRelaxedOpcode(Inst.getOpcode()) != 0;
+}
+
+bool
+SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                          uint64_t Value,
+                                          const MCRelaxableFragment *Fragment,
+                                          const MCAsmLayout &Layout) const {
+  // At the moment we just need to relax 16-bit fields to wider fields.
+  Value = extractBitsForFixup(Fixup.getKind(), Value);
+  return (int16_t)Value != (int64_t)Value;
+}
+
+void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst,
+                                           MCInst &Res) const {
+  unsigned Opcode = getRelaxedOpcode(Inst.getOpcode());
+  assert(Opcode && "Unexpected insn to relax");
+  Res = Inst;
+  Res.setOpcode(Opcode);
+}
+
+bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
+                                       MCObjectWriter *OW) const {
+  for (uint64_t I = 0; I != Count; ++I)
+    OW->Write8(7);
+  return true;
+}
+
+MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT,
+                                              StringRef CPU) {
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+  return new SystemZMCAsmBackend(OSABI);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
new file mode 100644
index 0000000..c96a0d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -0,0 +1,38 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+  PointerSize = 8;
+  CalleeSaveStackSlotSize = 8;
+  IsLittleEndian = false;
+
+  CommentString = "#";
+  PCSymbol = ".";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  ZeroDirective = "\t.space\t";
+  Data64bitsDirective = "\t.quad\t";
+  UsesELFSectionDirectiveForBSS = true;
+  SupportsDebugInformation = true;
+  HasLEB128 = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCSection *
+SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
new file mode 100644
index 0000000..bac1bca
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -0,0 +1,31 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class Target;
+class StringRef;
+
+class SystemZMCAsmInfo : public MCAsmInfo {
+public:
+  explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+
+  // Override MCAsmInfo;
+  virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const
+    LLVM_OVERRIDE;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
new file mode 100644
index 0000000..ea2250f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -0,0 +1,131 @@
+//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+class SystemZMCCodeEmitter : public MCCodeEmitter {
+  const MCInstrInfo &MCII;
+  MCContext &Ctx;
+
+public:
+  SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+    : MCII(mcii), Ctx(ctx) {
+  }
+
+  ~SystemZMCCodeEmitter() {}
+
+  // OVerride MCCodeEmitter.
+  virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                 SmallVectorImpl<MCFixup> &Fixups) const
+    LLVM_OVERRIDE;
+
+private:
+  // Automatically generated by TableGen.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  // Called by the TableGen code to get the binary encoding of operand
+  // MO in MI.  Fixups is the list of fixups against MI.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  // Operand OpNum of MI needs a PC-relative fixup of kind Kind at
+  // Offset bytes from the start of MI.  Add the fixup to Fixups
+  // and return the in-place addend, which since we're a RELA target
+  // is always 0.
+  unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            unsigned Kind, int64_t Offset) const;
+
+  unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                              SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
+  }
+  unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                              SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
+  }
+  unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2);
+  }
+  unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2);
+  }
+};
+}
+
+MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &MCSTI,
+                                                MCContext &Ctx) {
+  return new SystemZMCCodeEmitter(MCII, Ctx);
+}
+
+void SystemZMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+  unsigned Size = MCII.get(MI.getOpcode()).getSize();
+  // Big-endian insertion of Size bytes.
+  unsigned ShiftValue = (Size * 8) - 8;
+  for (unsigned I = 0; I != Size; ++I) {
+    OS << uint8_t(Bits >> ShiftValue);
+    ShiftValue -= 8;
+  }
+}
+
+unsigned SystemZMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg())
+    return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  llvm_unreachable("Unexpected operand type!");
+}
+
+unsigned
+SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       unsigned Kind, int64_t Offset) const {
+  const MCOperand &MO = MI.getOperand(OpNum);
+  // For compatibility with the GNU assembler, treat constant operands as
+  // unadjusted PC-relative offsets.
+  if (MO.isImm())
+    return MO.getImm() / 2;
+
+  const MCExpr *Expr = MO.getExpr();
+  if (Offset) {
+    // The operand value is relative to the start of MI, but the fixup
+    // is relative to the operand field itself, which is Offset bytes
+    // into MI.  Add Offset to the relocation value to cancel out
+    // this difference.
+    const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+    Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+  }
+  Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
+  return 0;
+}
+
+#include "SystemZGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
new file mode 100644
index 0000000..9c94ebb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -0,0 +1,31 @@
+//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZMCFIXUPS_H
+#define LLVM_SYSTEMZMCFIXUPS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace SystemZ {
+  enum FixupKind {
+    // These correspond directly to R_390_* relocations.
+    FK_390_PC16DBL = FirstTargetFixupKind,
+    FK_390_PC32DBL,
+    FK_390_PLT16DBL,
+    FK_390_PLT32DBL,
+
+    // Marker
+    LastTargetFixupKind,
+    NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+  };
+}
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
new file mode 100644
index 0000000..36e3d83
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -0,0 +1,140 @@
+//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+namespace {
+class SystemZObjectWriter : public MCELFObjectTargetWriter {
+public:
+  SystemZObjectWriter(uint8_t OSABI);
+
+  virtual ~SystemZObjectWriter();
+
+protected:
+  // Override MCELFObjectTargetWriter.
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const LLVM_OVERRIDE;
+  virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                         const MCValue &Target,
+                                         const MCFragment &F,
+                                         const MCFixup &Fixup,
+                                         bool IsPCRel) const LLVM_OVERRIDE;
+};
+} // end anonymouse namespace
+
+SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
+  : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
+                            /*HasRelocationAddend=*/ true) {}
+
+SystemZObjectWriter::~SystemZObjectWriter() {
+}
+
+// Return the relocation type for an absolute value of MCFixupKind Kind.
+static unsigned getAbsoluteReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_1: return ELF::R_390_8;
+  case FK_Data_2: return ELF::R_390_16;
+  case FK_Data_4: return ELF::R_390_32;
+  case FK_Data_8: return ELF::R_390_64;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the relocation type for a PC-relative value of MCFixupKind Kind.
+static unsigned getPCRelReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_2:                return ELF::R_390_PC16;
+  case FK_Data_4:                return ELF::R_390_PC32;
+  case FK_Data_8:                return ELF::R_390_PC64;
+  case SystemZ::FK_390_PC16DBL:  return ELF::R_390_PC16DBL;
+  case SystemZ::FK_390_PC32DBL:  return ELF::R_390_PC32DBL;
+  case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL;
+  case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL;
+  }
+  llvm_unreachable("Unsupported PC-relative address");
+}
+
+// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind.
+static unsigned getTLSLEReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_4: return ELF::R_390_TLS_LE32;
+  case FK_Data_8: return ELF::R_390_TLS_LE64;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the PLT relocation counterpart of MCFixupKind Kind.
+static unsigned getPLTReloc(unsigned Kind) {
+  switch (Kind) {
+  case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
+  case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel,
+                                           bool IsRelocWithSymbol,
+                                           int64_t Addend) const {
+  MCSymbolRefExpr::VariantKind Modifier = (Target.isAbsolute() ?
+                                           MCSymbolRefExpr::VK_None :
+                                           Target.getSymA()->getKind());
+  unsigned Kind = Fixup.getKind();
+  switch (Modifier) {
+  case MCSymbolRefExpr::VK_None:
+    if (IsPCRel)
+      return getPCRelReloc(Kind);
+    return getAbsoluteReloc(Kind);
+
+  case MCSymbolRefExpr::VK_NTPOFF:
+    assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
+    return getTLSLEReloc(Kind);
+
+  case MCSymbolRefExpr::VK_GOT:
+    if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+      return ELF::R_390_GOTENT;
+    llvm_unreachable("Only PC-relative GOT accesses are supported for now");
+
+  case MCSymbolRefExpr::VK_PLT:
+    assert(IsPCRel && "@PLT shouldt be PC-relative");
+    return getPLTReloc(Kind);
+
+  default:
+    llvm_unreachable("Modifier not supported");
+  }
+}
+
+const MCSymbol *SystemZObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                    const MCValue &Target,
+                                                    const MCFragment &F,
+                                                    const MCFixup &Fixup,
+                                                    bool IsPCRel) const {
+  // The addend in a PC-relative R_390_* relocation is always applied to
+  // the PC-relative part of the address.  If some kind of indirection
+  // is applied to the symbol first, we can't use an addend there too.
+  if (!Target.isAbsolute() &&
+      Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None &&
+      IsPCRel)
+    return &Target.getSymA()->getSymbol().AliasedSymbol();
+  return NULL;
+}
+
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+                                                uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 0000000..49a7f47
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,160 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCAsmInfo *createSystemZMCAsmInfo(const Target &T, StringRef TT) {
+  MCAsmInfo *MAI = new SystemZMCAsmInfo(T, TT);
+  MachineLocation FPDst(MachineLocation::VirtualFP);
+  MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP);
+  MAI->addInitialFrameState(0, FPDst, FPSrc);
+  return MAI;
+}
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSystemZMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitSystemZMCRegisterInfo(X, SystemZ::R14D);
+  return X;
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+                                                     StringRef CPU,
+                                                     StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // Static code is suitable for use in a dynamic executable; there is no
+  // separate DynamicNoPIC model.
+  if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
+    RM = Reloc::Static;
+
+  // For SystemZ we define the models as follows:
+  //
+  // Small:  BRASL can call any function and will use a stub if necessary.
+  //         Locally-binding symbols will always be in range of LARL.
+  //
+  // Medium: BRASL can call any function and will use a stub if necessary.
+  //         GOT slots and locally-defined text will always be in range
+  //         of LARL, but other symbols might not be.
+  //
+  // Large:  Equivalent to Medium for now.
+  //
+  // Kernel: Equivalent to Medium for now.
+  //
+  // This means that any PIC module smaller than 4GB meets the
+  // requirements of Small, so Small seems like the best default there.
+  //
+  // All symbols bind locally in a non-PIC module, so the choice is less
+  // obvious.  There are two cases:
+  //
+  // - When creating an executable, PLTs and copy relocations allow
+  //   us to treat external symbols as part of the executable.
+  //   Any executable smaller than 4GB meets the requirements of Small,
+  //   so that seems like the best default.
+  //
+  // - When creating JIT code, stubs will be in range of BRASL if the
+  //   image is less than 4GB in size.  GOT entries will likewise be
+  //   in range of LARL.  However, the JIT environment has no equivalent
+  //   of copy relocs, so locally-binding data symbols might not be in
+  //   the range of LARL.  We need the Medium model in that case.
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Small;
+  else if (CM == CodeModel::JITDefault)
+    CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI) {
+  return new SystemZInstPrinter(MAI, MII, MRI);
+}
+
+static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
+                                                 MCContext &Ctx,
+                                                 MCAsmBackend &MAB,
+                                                 raw_ostream &OS,
+                                                 MCCodeEmitter *Emitter,
+                                                 bool RelaxAll,
+                                                 bool NoExecStack) {
+  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeSystemZTargetMC() {
+  // Register the MCAsmInfo.
+  TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
+                                    createSystemZMCAsmInfo);
+
+  // Register the MCCodeGenInfo.
+  TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
+                                        createSystemZMCCodeGenInfo);
+
+  // Register the MCCodeEmitter.
+  TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
+					createSystemZMCCodeEmitter);
+
+  // Register the MCInstrInfo.
+  TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+                                      createSystemZMCInstrInfo);
+
+  // Register the MCRegisterInfo.
+  TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
+                                    createSystemZMCRegisterInfo);
+
+  // Register the MCSubtargetInfo.
+  TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+                                          createSystemZMCSubtargetInfo);
+
+  // Register the MCAsmBackend.
+  TargetRegistry::RegisterMCAsmBackend(TheSystemZTarget,
+                                       createSystemZMCAsmBackend);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
+                                        createSystemZMCInstPrinter);
+
+  // Register the MCObjectStreamer;
+  TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget,
+                                           createSystemZMCObjectStreamer);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 0000000..229912f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,62 @@
+//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheSystemZTarget;
+
+namespace SystemZMC {
+  // How many bytes are in the ABI-defined, caller-allocated part of
+  // a stack frame.
+  const int64_t CallFrameSize = 160;
+
+  // The offset of the DWARF CFA from the incoming stack pointer.
+  const int64_t CFAOffsetFromInitialSP = CallFrameSize;
+}
+
+MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+                                          const MCRegisterInfo &MRI,
+                                          const MCSubtargetInfo &STI,
+                                          MCContext &Ctx);
+
+MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT,
+                                        StringRef CPU);
+
+MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+} // end namespace llvm
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt
new file mode 100644
index 0000000..d1f56a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/README.txt
@@ -0,0 +1,146 @@
+//===---------------------------------------------------------------------===//
+// Random notes about and ideas for the SystemZ backend.
+//===---------------------------------------------------------------------===//
+
+The initial backend is deliberately restricted to z10.  We should add support
+for later architectures at some point.
+
+--
+
+SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all
+inline asm memory constraints; it doesn't get to see the original constraint.
+This means that it must conservatively treat all inline asm constraints
+as the most restricted type, "R".
+
+--
+
+If an inline asm ties an i32 "r" result to an i64 input, the input
+will be treated as an i32, leaving the upper bits uninitialised.
+For example:
+
+define void @f4(i32 *%dst) {
+  %val = call i32 asm "blah $0", "=r,0" (i64 103)
+  store i32 %val, i32 *%dst
+  ret void
+}
+
+from CodeGen/SystemZ/asm-09.ll will use LHI rather than LGHI.
+to load 103.  This seems to be a general target-independent problem.
+
+--
+
+The tuning of the choice between Load Address (LA) and addition in
+SystemZISelDAGToDAG.cpp is suspect.  It should be tweaked based on
+performance measurements.
+
+--
+
+There is no scheduling support.
+
+--
+
+We don't use the Branch on Count or Branch on Index families of instruction.
+
+--
+
+We don't use the condition code results of anything except comparisons.
+
+Implementing this may need something more finely grained than the z_cmp
+and z_ucmp that we have now.  It might (or might not) also be useful to
+have a mask of "don't care" values in conditional branches.  For example,
+integer comparisons never set CC to 3, so the bottom bit of the CC mask
+isn't particularly relevant.  JNLH and JE are equally good for testing
+equality after an integer comparison, etc.
+
+--
+
+We don't optimize string and block memory operations.
+
+--
+
+We don't take full advantage of builtins like fabsl because the calling
+conventions require f128s to be returned by invisible reference.
+
+--
+
+DAGCombiner can detect integer absolute, but there's not yet an associated
+ISD opcode.  We could add one and implement it using Load Positive.
+Negated absolutes could use Load Negative.
+
+--
+
+DAGCombiner doesn't yet fold truncations of extended loads.  Functions like:
+
+    unsigned long f (unsigned long x, unsigned short *y)
+    {
+      return (x << 32) | *y;
+    }
+
+therefore end up as:
+
+        sllg    %r2, %r2, 32
+        llgh    %r0, 0(%r3)
+        lr      %r2, %r0
+        br      %r14
+
+but truncating the load would give:
+
+        sllg    %r2, %r2, 32
+        lh      %r2, 0(%r3)
+        br      %r14
+
+--
+
+Functions like:
+
+define i64 @f1(i64 %a) {
+  %and = and i64 %a, 1
+  ret i64 %and
+}
+
+ought to be implemented as:
+
+        lhi     %r0, 1
+        ngr     %r2, %r0
+        br      %r14
+
+but two-address optimisations reverse the order of the AND and force:
+
+        lhi     %r0, 1
+        ngr     %r0, %r2
+        lgr     %r2, %r0
+        br      %r14
+
+CodeGen/SystemZ/and-04.ll has several examples of this.
+
+--
+
+Out-of-range displacements are usually handled by loading the full
+address into a register.  In many cases it would be better to create
+an anchor point instead.  E.g. for:
+
+define void @f4a(i128 *%aptr, i64 %base) {
+  %addr = add i64 %base, 524288
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+(from CodeGen/SystemZ/int-add-08.ll) we load %base+524288 and %base+524296
+into separate registers, rather than using %base+524288 as a base for both.
+
+--
+
+Dynamic stack allocations round the size to 8 bytes and then allocate
+that rounded amount.  It would be simpler to subtract the unrounded
+size from the copy of the stack pointer and then align the result.
+See CodeGen/SystemZ/alloca-01.ll for an example.
+
+--
+
+Atomic loads and stores use the default compare-and-swap based implementation.
+This is probably much too conservative in practice, and the overhead is
+especially bad for 8- and 16-bit accesses.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 0000000..b811cbe
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,77 @@
+//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_H
+#define SYSTEMZ_H
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+  class SystemZTargetMachine;
+  class FunctionPass;
+
+  namespace SystemZ {
+    // Condition-code mask values.
+    const unsigned CCMASK_0 = 1 << 3;
+    const unsigned CCMASK_1 = 1 << 2;
+    const unsigned CCMASK_2 = 1 << 1;
+    const unsigned CCMASK_3 = 1 << 0;
+    const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
+
+    // Condition-code mask assignments for floating-point comparisons.
+    const unsigned CCMASK_CMP_EQ = CCMASK_0;
+    const unsigned CCMASK_CMP_LT = CCMASK_1;
+    const unsigned CCMASK_CMP_GT = CCMASK_2;
+    const unsigned CCMASK_CMP_UO = CCMASK_3;
+    const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT;
+    const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT;
+    const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT;
+    const unsigned CCMASK_CMP_O  = CCMASK_ANY ^ CCMASK_CMP_UO;
+
+    // Return true if Val fits an LLILL operand.
+    static inline bool isImmLL(uint64_t Val) {
+      return (Val & ~0x000000000000ffffULL) == 0;
+    }
+
+    // Return true if Val fits an LLILH operand.
+    static inline bool isImmLH(uint64_t Val) {
+      return (Val & ~0x00000000ffff0000ULL) == 0;
+    }
+
+    // Return true if Val fits an LLIHL operand.
+    static inline bool isImmHL(uint64_t Val) {
+      return (Val & ~0x00000ffff00000000ULL) == 0;
+    }
+
+    // Return true if Val fits an LLIHH operand.
+    static inline bool isImmHH(uint64_t Val) {
+      return (Val & ~0xffff000000000000ULL) == 0;
+    }
+
+    // Return true if Val fits an LLILF operand.
+    static inline bool isImmLF(uint64_t Val) {
+      return (Val & ~0x00000000ffffffffULL) == 0;
+    }
+
+    // Return true if Val fits an LLIHF operand.
+    static inline bool isImmHF(uint64_t Val) {
+      return (Val & ~0xffffffff00000000ULL) == 0;
+    }
+  }
+
+  FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel);
+} // end namespace llvm;
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 0000000..e03c32f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,75 @@
+//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z10", []>;
+
+//===----------------------------------------------------------------------===//
+// Register file description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling convention description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZOperators.td"
+include "SystemZOperands.td"
+include "SystemZPatterns.td"
+include "SystemZInstrFormats.td"
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Assembly parser
+//===----------------------------------------------------------------------===//
+
+def SystemZAsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly writer
+//===----------------------------------------------------------------------===//
+
+def SystemZAsmWriter : AsmWriter {
+  string AsmWriterClassName = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-level target declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+  let InstructionSet = SystemZInstrInfo;
+  let AssemblyParsers = [SystemZAsmParser];
+  let AssemblyWriters = [SystemZAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 0000000..1e15ab1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,113 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Streams SystemZ assembly language and associated data, in the form of
+// MCInsts and MCExprs respectively.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZAsmPrinter.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMCInstLower.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+  MCInst LoweredMI;
+  Lower.lower(MI, LoweredMI);
+  OutStreamer.EmitInstruction(LoweredMI);
+}
+
+// Convert a SystemZ-specific constant pool modifier into the associated
+// MCSymbolRefExpr variant kind.
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
+  switch (Modifier) {
+  case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
+  }
+  llvm_unreachable("Invalid SystemCPModifier!");
+}
+
+void SystemZAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  SystemZConstantPoolValue *ZCPV =
+    static_cast<SystemZConstantPoolValue*>(MCPV);
+
+  const MCExpr *Expr =
+    MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()),
+                            getModifierVariantKind(ZCPV->getModifier()),
+                            OutContext);
+  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+
+  OutStreamer.EmitValue(Expr, Size);
+}
+
+bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+                                        unsigned OpNo,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode,
+                                        raw_ostream &OS) {
+  if (ExtraCode && *ExtraCode == 'n') {
+    if (!MI->getOperand(OpNo).isImm())
+      return true;
+    OS << -int64_t(MI->getOperand(OpNo).getImm());
+  } else {
+    SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+    MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
+    SystemZInstPrinter::printOperand(MO, OS);
+  }
+  return false;
+}
+
+bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                              unsigned OpNo,
+                                              unsigned AsmVariant,
+                                              const char *ExtraCode,
+                                              raw_ostream &OS) {
+  SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(),
+                                   MI->getOperand(OpNo + 1).getImm(),
+                                   MI->getOperand(OpNo + 2).getReg(), OS);
+  return false;
+}
+
+void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetELF()) {
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const DataLayout *TD = TM.getDataLayout();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        OutStreamer.EmitLabel(Stubs[i].first);
+        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+                                    TD->getPointerSize(0), 0);
+      }
+      Stubs.clear();
+    }
+  }
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
new file mode 100644
index 0000000..4b6c51b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -0,0 +1,52 @@
+//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZASMPRINTER_H
+#define SYSTEMZASMPRINTER_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineBasicBlock;
+class MachineInstr;
+class Module;
+class raw_ostream;
+
+class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
+private:
+  const SystemZSubtarget *Subtarget;
+
+public:
+  SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<SystemZSubtarget>();
+  }
+
+  // Override AsmPrinter.
+  virtual const char *getPassName() const LLVM_OVERRIDE {
+    return "SystemZ Assembly Printer";
+  }
+  virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
+  virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV)
+    LLVM_OVERRIDE;
+  virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &OS) LLVM_OVERRIDE;
+  virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                     unsigned AsmVariant,
+                                     const char *ExtraCode,
+                                     raw_ostream &OS) LLVM_OVERRIDE;
+  virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
new file mode 100644
index 0000000..cc9c84b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -0,0 +1,21 @@
+//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZCallingConv.h"
+#include "SystemZRegisterInfo.h"
+
+using namespace llvm;
+
+const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
+  SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D
+};
+
+const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
+  SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
+};
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
new file mode 100644
index 0000000..298985e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -0,0 +1,23 @@
+//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZCALLINGCONV_H
+#define SYSTEMZCALLINGCONV_H
+
+namespace llvm {
+  namespace SystemZ {
+    const unsigned NumArgGPRs = 5;
+    extern const unsigned ArgGPRs[NumArgGPRs];
+
+    const unsigned NumArgFPRs = 4;
+    extern const unsigned ArgFPRs[NumArgFPRs];
+  }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 0000000..c2d727f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,65 @@
+//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for the SystemZ ABI.
+//===----------------------------------------------------------------------===//
+
+class CCIfExtend<CCAction A>
+  : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
+
+//===----------------------------------------------------------------------===//
+// SVR4 return value calling convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+  // Promote i32 to i64 if it has an explicit extension type.
+  CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+  // ABI-compliant code returns 64-bit integers in R2.  Make the other
+  // call-clobbered argument registers available for code that doesn't
+  // care about the ABI.  (R6 is an argument register too, but is
+  // call-saved and therefore not suitable for return values.)
+  CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>,
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+  // ABI-complaint code returns float and double in F0.  Make the
+  // other floating-point argument registers available for code that
+  // doesn't care about the ABI.  All floating-point argument registers
+  // are call-clobbered, so we can use all of them here.
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>
+
+  // ABI-compliant code returns long double by reference, but that conversion
+  // is left to higher-level code.  Perhaps we could add an f128 definition
+  // here for code that doesn't care about the ABI?
+]>;
+
+//===----------------------------------------------------------------------===//
+// SVR4 argument calling conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+  // Promote i32 to i64 if it has an explicit extension type.
+  // The convention is that true integer arguments that are smaller
+  // than 64 bits should be marked as extended, but structures that
+  // are smaller than 64 bits shouldn't.
+  CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+  // Force long double values to the stack and pass i64 pointers to them.
+  CCIfType<[f128], CCPassIndirect<i64>>,
+
+  // The first 5 integer arguments are passed in R2-R6.  Note that R6
+  // is call-saved.
+  CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>,
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+  // The first 4 float and double arguments are passed in even registers F0-F6.
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+
+  // Other arguments are passed in 8-byte-aligned 8-byte stack slots.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
new file mode 100644
index 0000000..e9c4f6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -0,0 +1,62 @@
+//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+SystemZConstantPoolValue::
+SystemZConstantPoolValue(const GlobalValue *gv,
+                         SystemZCP::SystemZCPModifier modifier)
+  : MachineConstantPoolValue(gv->getType()), GV(gv), Modifier(modifier) {}
+
+SystemZConstantPoolValue *
+SystemZConstantPoolValue::Create(const GlobalValue *GV,
+                                 SystemZCP::SystemZCPModifier Modifier) {
+  return new SystemZConstantPoolValue(GV, Modifier);
+}
+
+unsigned SystemZConstantPoolValue::getRelocationInfo() const {
+  switch (Modifier) {
+  case SystemZCP::NTPOFF:
+    // May require a relocation, but the relocations are always resolved
+    // by the static linker.
+    return 1;
+  }
+  llvm_unreachable("Unknown modifier");
+}
+
+int SystemZConstantPoolValue::
+getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
+  unsigned AlignMask = Alignment - 1;
+  const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+  for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
+    if (Constants[I].isMachineConstantPoolEntry() &&
+        (Constants[I].getAlignment() & AlignMask) == 0) {
+      SystemZConstantPoolValue *ZCPV =
+        static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal);
+      if (ZCPV->GV == GV && ZCPV->Modifier == Modifier)
+        return I;
+    }
+  }
+  return -1;
+}
+
+void SystemZConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+  ID.AddPointer(GV);
+  ID.AddInteger(Modifier);
+}
+
+void SystemZConstantPoolValue::print(raw_ostream &O) const {
+  O << GV << "@" << int(Modifier);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
new file mode 100644
index 0000000..9927bdb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -0,0 +1,55 @@
+//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZCONSTANTPOOLVALUE_H
+#define SYSTEMZCONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace SystemZCP {
+  enum SystemZCPModifier {
+    NTPOFF
+  };
+}
+
+/// A SystemZ-specific constant pool value.  At present, the only
+/// defined constant pool values are offsets of thread-local variables
+/// (written x@NTPOFF).
+class SystemZConstantPoolValue : public MachineConstantPoolValue {
+  const GlobalValue *GV;
+  SystemZCP::SystemZCPModifier Modifier;
+
+protected:
+  SystemZConstantPoolValue(const GlobalValue *GV,
+                           SystemZCP::SystemZCPModifier Modifier);
+
+public:
+  static SystemZConstantPoolValue *
+    Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier);
+
+  // Override MachineConstantPoolValue.
+  virtual unsigned getRelocationInfo() const LLVM_OVERRIDE;
+  virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+                                        unsigned Alignment) LLVM_OVERRIDE;
+  virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) LLVM_OVERRIDE;
+  virtual void print(raw_ostream &O) const LLVM_OVERRIDE;
+
+  // Access SystemZ-specific fields.
+  const GlobalValue *getGlobalValue() const { return GV; }
+  SystemZCP::SystemZCPModifier getModifier() const { return Modifier; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 0000000..fda33de
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,535 @@
+//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
+                                           const SystemZSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
+                        -SystemZMC::CallFrameSize),
+    TM(tm),
+    STI(sti) {
+  // The ABI-defined register save slots, relative to the incoming stack
+  // pointer.
+  static const unsigned SpillOffsetTable[][2] = {
+    { SystemZ::R2D,  0x10 },
+    { SystemZ::R3D,  0x18 },
+    { SystemZ::R4D,  0x20 },
+    { SystemZ::R5D,  0x28 },
+    { SystemZ::R6D,  0x30 },
+    { SystemZ::R7D,  0x38 },
+    { SystemZ::R8D,  0x40 },
+    { SystemZ::R9D,  0x48 },
+    { SystemZ::R10D, 0x50 },
+    { SystemZ::R11D, 0x58 },
+    { SystemZ::R12D, 0x60 },
+    { SystemZ::R13D, 0x68 },
+    { SystemZ::R14D, 0x70 },
+    { SystemZ::R15D, 0x78 },
+    { SystemZ::F0D,  0x80 },
+    { SystemZ::F2D,  0x88 },
+    { SystemZ::F4D,  0x90 },
+    { SystemZ::F6D,  0x98 }
+  };
+
+  // Create a mapping from register number to save slot offset.
+  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+  for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
+    RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1];
+}
+
+void SystemZFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  bool HasFP = hasFP(MF);
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  bool IsVarArg = MF.getFunction()->isVarArg();
+
+  // va_start stores incoming FPR varargs in the normal way, but delegates
+  // the saving of incoming GPR varargs to spillCalleeSavedRegisters().
+  // Record these pending uses, which typically include the call-saved
+  // argument register R6D.
+  if (IsVarArg)
+    for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+      MRI.setPhysRegUsed(SystemZ::ArgGPRs[I]);
+
+  // If the function requires a frame pointer, record that the hard
+  // frame pointer will be clobbered.
+  if (HasFP)
+    MRI.setPhysRegUsed(SystemZ::R11D);
+
+  // If the function calls other functions, record that the return
+  // address register will be clobbered.
+  if (MFFrame->hasCalls())
+    MRI.setPhysRegUsed(SystemZ::R14D);
+
+  // If we are saving GPRs other than the stack pointer, we might as well
+  // save and restore the stack pointer at the same time, via STMG and LMG.
+  // This allows the deallocation to be done by the LMG, rather than needing
+  // a separate %r15 addition.
+  const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
+  for (unsigned I = 0; CSRegs[I]; ++I) {
+    unsigned Reg = CSRegs[I];
+    if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) {
+      MRI.setPhysRegUsed(SystemZ::R15D);
+      break;
+    }
+  }
+}
+
+// Add GPR64 to the save instruction being built by MIB, which is in basic
+// block MBB.  IsImplicit says whether this is an explicit operand to the
+// instruction, or an implicit one that comes between the explicit start
+// and end registers.
+static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
+                        const SystemZTargetMachine &TM,
+                        unsigned GPR64, bool IsImplicit) {
+  const SystemZRegisterInfo *RI = TM.getRegisterInfo();
+  unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit);
+  bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
+  if (!IsLive || !IsImplicit) {
+    MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
+    if (!IsLive)
+      MBB.addLiveIn(GPR64);
+  }
+}
+
+bool SystemZFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  bool IsVarArg = MF.getFunction()->isVarArg();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Scan the call-saved GPRs and find the bounds of the register spill area.
+  unsigned SavedGPRFrameSize = 0;
+  unsigned LowGPR = 0;
+  unsigned HighGPR = SystemZ::R15D;
+  unsigned StartOffset = -1U;
+  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+    unsigned Reg = CSI[I].getReg();
+    if (SystemZ::GR64BitRegClass.contains(Reg)) {
+      SavedGPRFrameSize += 8;
+      unsigned Offset = RegSpillOffsets[Reg];
+      assert(Offset && "Unexpected GPR save");
+      if (StartOffset > Offset) {
+        LowGPR = Reg;
+        StartOffset = Offset;
+      }
+    }
+  }
+
+  // Save information about the range and location of the call-saved
+  // registers, for use by the epilogue inserter.
+  ZFI->setSavedGPRFrameSize(SavedGPRFrameSize);
+  ZFI->setLowSavedGPR(LowGPR);
+  ZFI->setHighSavedGPR(HighGPR);
+
+  // Include the GPR varargs, if any.  R6D is call-saved, so would
+  // be included by the loop above, but we also need to handle the
+  // call-clobbered argument registers.
+  if (IsVarArg) {
+    unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+    if (FirstGPR < SystemZ::NumArgGPRs) {
+      unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+      unsigned Offset = RegSpillOffsets[Reg];
+      if (StartOffset > Offset) {
+        LowGPR = Reg; StartOffset = Offset;
+      }
+    }
+  }
+
+  // Save GPRs
+  if (LowGPR) {
+    assert(LowGPR != HighGPR && "Should be saving %r15 and something else");
+
+    // Build an STMG instruction.
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
+
+    // Add the explicit register operands.
+    addSavedGPR(MBB, MIB, TM, LowGPR, false);
+    addSavedGPR(MBB, MIB, TM, HighGPR, false);
+
+    // Add the address.
+    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+
+    // Make sure all call-saved GPRs are included as operands and are
+    // marked as live on entry.
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      unsigned Reg = CSI[I].getReg();
+      if (SystemZ::GR64BitRegClass.contains(Reg))
+        addSavedGPR(MBB, MIB, TM, Reg, true);
+    }
+
+    // ...likewise GPR varargs.
+    if (IsVarArg)
+      for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+        addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true);
+  }
+
+  // Save FPRs in the normal TargetInstrInfo way.
+  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+    unsigned Reg = CSI[I].getReg();
+    if (SystemZ::FP64BitRegClass.contains(Reg)) {
+      MBB.addLiveIn(Reg);
+      TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+                               &SystemZ::FP64BitRegClass, TRI);
+    }
+  }
+
+  return true;
+}
+
+bool SystemZFrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  bool HasFP = hasFP(MF);
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Restore FPRs in the normal TargetInstrInfo way.
+  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+    unsigned Reg = CSI[I].getReg();
+    if (SystemZ::FP64BitRegClass.contains(Reg))
+      TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+                                &SystemZ::FP64BitRegClass, TRI);
+  }
+
+  // Restore call-saved GPRs (but not call-clobbered varargs, which at
+  // this point might hold return values).
+  unsigned LowGPR = ZFI->getLowSavedGPR();
+  unsigned HighGPR = ZFI->getHighSavedGPR();
+  unsigned StartOffset = RegSpillOffsets[LowGPR];
+  if (LowGPR) {
+    // If we saved any of %r2-%r5 as varargs, we should also be saving
+    // and restoring %r6.  If we're saving %r6 or above, we should be
+    // restoring it too.
+    assert(LowGPR != HighGPR && "Should be loading %r15 and something else");
+
+    // Build an LMG instruction.
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
+
+    // Add the explicit register operands.
+    MIB.addReg(LowGPR, RegState::Define);
+    MIB.addReg(HighGPR, RegState::Define);
+
+    // Add the address.
+    MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D);
+    MIB.addImm(StartOffset);
+
+    // Do a second scan adding regs as being defined by instruction
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      unsigned Reg = CSI[I].getReg();
+      if (Reg != LowGPR && Reg != HighGPR)
+        MIB.addReg(Reg, RegState::ImplicitDefine);
+    }
+  }
+
+  return true;
+}
+
+// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
+static void emitIncrement(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MBBI,
+                          const DebugLoc &DL,
+                          unsigned Reg, int64_t NumBytes,
+                          const TargetInstrInfo *TII) {
+  while (NumBytes) {
+    unsigned Opcode;
+    int64_t ThisVal = NumBytes;
+    if (isInt<16>(NumBytes))
+      Opcode = SystemZ::AGHI;
+    else {
+      Opcode = SystemZ::AGFI;
+      // Make sure we maintain 8-byte stack alignment.
+      int64_t MinVal = -int64_t(1) << 31;
+      int64_t MaxVal = (int64_t(1) << 31) - 8;
+      if (ThisVal < MinVal)
+        ThisVal = MinVal;
+      else if (ThisVal > MaxVal)
+        ThisVal = MaxVal;
+    }
+    MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg)
+      .addReg(Reg).addImm(ThisVal);
+    // The PSW implicit def is dead.
+    MI->getOperand(3).setIsDead();
+    NumBytes -= ThisVal;
+  }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  const SystemZInstrInfo *ZII =
+    static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
+  bool HasFP = hasFP(MF);
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // The current offset of the stack pointer from the CFA.
+  int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+
+  if (ZFI->getLowSavedGPR()) {
+    // Skip over the GPR saves.
+    if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG)
+      ++MBBI;
+    else
+      llvm_unreachable("Couldn't skip over GPR saves");
+
+    // Add CFI for the GPR saves.
+    MCSymbol *GPRSaveLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL,
+            ZII->get(TargetOpcode::PROLOG_LABEL)).addSym(GPRSaveLabel);
+    for (std::vector<CalleeSavedInfo>::const_iterator
+           I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+      unsigned Reg = I->getReg();
+      if (SystemZ::GR64BitRegClass.contains(Reg)) {
+        int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
+        MachineLocation StackSlot(MachineLocation::VirtualFP, Offset);
+        MachineLocation RegValue(Reg);
+        Moves.push_back(MachineMove(GPRSaveLabel, StackSlot, RegValue));
+      }
+    }
+  }
+
+  uint64_t StackSize = getAllocatedStackSize(MF);
+  if (StackSize) {
+    // Allocate StackSize bytes.
+    int64_t Delta = -int64_t(StackSize);
+    emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+
+    // Add CFI for the allocation.
+    MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+      .addSym(AdjustSPLabel);
+    MachineLocation FPDest(MachineLocation::VirtualFP);
+    MachineLocation FPSrc(MachineLocation::VirtualFP, SPOffsetFromCFA + Delta);
+    Moves.push_back(MachineMove(AdjustSPLabel, FPDest, FPSrc));
+    SPOffsetFromCFA += Delta;
+  }
+
+  if (HasFP) {
+    // Copy the base of the frame to R11.
+    BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D)
+      .addReg(SystemZ::R15D);
+
+    // Add CFI for the new frame location.
+    MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+      .addSym(SetFPLabel);
+    MachineLocation HardFP(SystemZ::R11D);
+    MachineLocation VirtualFP(MachineLocation::VirtualFP);
+    Moves.push_back(MachineMove(SetFPLabel, HardFP, VirtualFP));
+
+    // Mark the FramePtr as live at the beginning of every block except
+    // the entry block.  (We'll have marked R11 as live on entry when
+    // saving the GPRs.)
+    for (MachineFunction::iterator
+           I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I)
+      I->addLiveIn(SystemZ::R11D);
+  }
+
+  // Skip over the FPR saves.
+  MCSymbol *FPRSaveLabel = 0;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    unsigned Reg = I->getReg();
+    if (SystemZ::FP64BitRegClass.contains(Reg)) {
+      if (MBBI != MBB.end() &&
+          (MBBI->getOpcode() == SystemZ::STD ||
+           MBBI->getOpcode() == SystemZ::STDY))
+        ++MBBI;
+      else
+        llvm_unreachable("Couldn't skip over FPR save");
+
+      // Add CFI for the this save.
+      if (!FPRSaveLabel)
+        FPRSaveLabel = MMI.getContext().CreateTempSymbol();
+      unsigned Reg = I->getReg();
+      int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx());
+      MachineLocation Slot(MachineLocation::VirtualFP,
+                           SPOffsetFromCFA + Offset);
+      MachineLocation RegValue(Reg);
+      Moves.push_back(MachineMove(FPRSaveLabel, Slot, RegValue));
+    }
+  }
+  // Complete the CFI for the FPR saves, modelling them as taking effect
+  // after the last save.
+  if (FPRSaveLabel)
+    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+      .addSym(FPRSaveLabel);
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SystemZInstrInfo *ZII =
+    static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+  // Skip the return instruction.
+  assert(MBBI->getOpcode() == SystemZ::RET &&
+         "Can only insert epilogue into returning blocks");
+
+  uint64_t StackSize = getAllocatedStackSize(MF);
+  if (ZFI->getLowSavedGPR()) {
+    --MBBI;
+    unsigned Opcode = MBBI->getOpcode();
+    if (Opcode != SystemZ::LMG)
+      llvm_unreachable("Expected to see callee-save register restore code");
+
+    unsigned AddrOpNo = 2;
+    DebugLoc DL = MBBI->getDebugLoc();
+    uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm();
+    unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+
+    // If the offset is too large, use the largest stack-aligned offset
+    // and add the rest to the base register (the stack or frame pointer).
+    if (!NewOpcode) {
+      uint64_t NumBytes = Offset - 0x7fff8;
+      emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(),
+                    NumBytes, ZII);
+      Offset -= NumBytes;
+      NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+      assert(NewOpcode && "No restore instruction available");
+    }
+
+    MBBI->setDesc(ZII->get(NewOpcode));
+    MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset);
+  } else if (StackSize) {
+    DebugLoc DL = MBBI->getDebugLoc();
+    emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII);
+  }
+}
+
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+          MF.getFrameInfo()->hasVarSizedObjects() ||
+          MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                              int FI) const {
+  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+
+  // Start with the offset of FI from the top of the caller-allocated frame
+  // (i.e. the top of the 160 bytes allocated by the caller).  This initial
+  // offset is therefore negative.
+  int64_t Offset = (MFFrame->getObjectOffset(FI) +
+                    MFFrame->getOffsetAdjustment());
+  if (FI >= 0)
+    // Non-fixed objects are allocated below the incoming stack pointer.
+    // Account for the space at the top of the frame that we choose not
+    // to allocate.
+    Offset += getUnallocatedTopBytes(MF);
+
+  // Make the offset relative to the incoming stack pointer.
+  Offset -= getOffsetOfLocalArea();
+
+  // Make the offset relative to the bottom of the frame.
+  Offset += getAllocatedStackSize(MF);
+
+  return Offset;
+}
+
+uint64_t SystemZFrameLowering::
+getUnallocatedTopBytes(const MachineFunction &MF) const {
+  return MF.getInfo<SystemZMachineFunctionInfo>()->getSavedGPRFrameSize();
+}
+
+uint64_t SystemZFrameLowering::
+getAllocatedStackSize(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+
+  // Start with the size of the local variables and spill slots.
+  uint64_t StackSize = MFFrame->getStackSize();
+
+  // Remove any bytes that we choose not to allocate.
+  StackSize -= getUnallocatedTopBytes(MF);
+
+  // Include space for an emergency spill slot, if one might be needed.
+  StackSize += getEmergencySpillSlotSize(MF);
+
+  // We need to allocate the ABI-defined 160-byte base area whenever
+  // we allocate stack space for our own use and whenever we call another
+  // function.
+  if (StackSize || MFFrame->hasVarSizedObjects() || MFFrame->hasCalls())
+    StackSize += SystemZMC::CallFrameSize;
+
+  return StackSize;
+}
+
+unsigned SystemZFrameLowering::
+getEmergencySpillSlotSize(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2;
+  return isUInt<12>(MaxReach) ? 0 : 8;
+}
+
+unsigned SystemZFrameLowering::
+getEmergencySpillSlotOffset(const MachineFunction &MF) const {
+  assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot");
+  return SystemZMC::CallFrameSize;
+}
+
+bool
+SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  // The ABI requires us to allocate 160 bytes of stack space for the callee,
+  // with any outgoing stack arguments being placed above that.  It seems
+  // better to make that area a permanent feature of the frame even if
+  // we're using a frame pointer.
+  return true;
+}
+
+void SystemZFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::ADJCALLSTACKDOWN:
+  case SystemZ::ADJCALLSTACKUP:
+    assert(hasReservedCallFrame(MF) &&
+           "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+    MBB.erase(MI);
+    break;
+
+  default:
+    llvm_unreachable("Unexpected call frame instruction");
+  }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 0000000..5ca049c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,93 @@
+//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZFRAMELOWERING_H
+#define SYSTEMZFRAMELOWERING_H
+
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class SystemZTargetMachine;
+class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+  IndexedMap<unsigned> RegSpillOffsets;
+
+protected:
+  const SystemZTargetMachine &TM;
+  const SystemZSubtarget &STI;
+
+public:
+  SystemZFrameLowering(const SystemZTargetMachine &tm,
+                       const SystemZSubtarget &sti);
+
+  // Override FrameLowering.
+  virtual void
+    processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                         RegScavenger *RS) const LLVM_OVERRIDE;
+  virtual bool
+    spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              const std::vector<CalleeSavedInfo> &CSI,
+                              const TargetRegisterInfo *TRI) const
+    LLVM_OVERRIDE;
+  virtual bool
+    restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBII,
+                                const std::vector<CalleeSavedInfo> &CSI,
+                                const TargetRegisterInfo *TRI) const
+    LLVM_OVERRIDE;
+  virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE;
+  virtual void emitEpilogue(MachineFunction &MF,
+                            MachineBasicBlock &MBB) const LLVM_OVERRIDE;
+  virtual bool hasFP(const MachineFunction &MF) const LLVM_OVERRIDE;
+  virtual int getFrameIndexOffset(const MachineFunction &MF,
+                                  int FI) const LLVM_OVERRIDE;
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const
+    LLVM_OVERRIDE;
+  virtual void
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const
+    LLVM_OVERRIDE;
+
+  // The target-independent code automatically allocates save slots for
+  // call-saved GPRs.  However, we don't need those slots for SystemZ,
+  // because the ABI sets aside GPR save slots in the caller-allocated part
+  // of the frame.  Since the target-independent code puts this unneeded
+  // area at the top of the callee-allocated part of frame, we choose not
+  // to allocate it and adjust the offsets accordingly.  Return the
+  // size of this unallocated area.
+  // FIXME: seems a bit hackish.
+  uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const;
+
+  // Return the number of bytes in the callee-allocated part of the frame.
+  uint64_t getAllocatedStackSize(const MachineFunction &MF) const;
+
+  // Return the number of frame bytes that should be reserved for
+  // an emergency spill slot, for use by the register scaveneger.
+  // Return 0 if register scaveging won't be needed.
+  unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const;
+
+  // Return the offset from the frame pointer of the emergency spill slot,
+  // which always fits within a 12-bit unsigned displacement field.
+  // Only valid if getEmergencySpillSlotSize(MF) returns nonzero.
+  unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const;
+
+  // Return the byte offset from the incoming stack pointer of Reg's
+  // ABI-defined save slot.  Return 0 if no slot is defined for Reg.
+  unsigned getRegSpillOffset(unsigned Reg) const {
+    return RegSpillOffsets[Reg];
+  }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 0000000..d436ba9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,616 @@
+//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// Used to build addressing modes.
+struct SystemZAddressingMode {
+  // The shape of the address.
+  enum AddrForm {
+    // base+displacement
+    FormBD,
+
+    // base+displacement+index for load and store operands
+    FormBDXNormal,
+
+    // base+displacement+index for load address operands
+    FormBDXLA,
+
+    // base+displacement+index+ADJDYNALLOC
+    FormBDXDynAlloc
+  };
+  AddrForm Form;
+
+  // The type of displacement.  The enum names here correspond directly
+  // to the definitions in SystemZOperand.td.  We could split them into
+  // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it.
+  enum DispRange {
+    Disp12Only,
+    Disp12Pair,
+    Disp20Only,
+    Disp20Only128,
+    Disp20Pair
+  };
+  DispRange DR;
+
+  // The parts of the address.  The address is equivalent to:
+  //
+  //     Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0)
+  SDValue Base;
+  int64_t Disp;
+  SDValue Index;
+  bool IncludesDynAlloc;
+
+  SystemZAddressingMode(AddrForm form, DispRange dr)
+    : Form(form), DR(dr), Base(), Disp(0), Index(),
+      IncludesDynAlloc(false) {}
+
+  // True if the address can have an index register.
+  bool hasIndexField() { return Form != FormBD; }
+
+  // True if the address can (and must) include ADJDYNALLOC.
+  bool isDynAlloc() { return Form == FormBDXDynAlloc; }
+
+  void dump() {
+    errs() << "SystemZAddressingMode " << this << '\n';
+
+    errs() << " Base ";
+    if (Base.getNode() != 0)
+      Base.getNode()->dump();
+    else
+      errs() << "null\n";
+
+    if (hasIndexField()) {
+      errs() << " Index ";
+      if (Index.getNode() != 0)
+        Index.getNode()->dump();
+      else
+        errs() << "null\n";
+    }
+
+    errs() << " Disp " << Disp;
+    if (IncludesDynAlloc)
+      errs() << " + ADJDYNALLOC";
+    errs() << '\n';
+  }
+};
+
+class SystemZDAGToDAGISel : public SelectionDAGISel {
+  const SystemZTargetLowering &Lowering;
+  const SystemZSubtarget &Subtarget;
+
+  // Used by SystemZOperands.td to create integer constants.
+  inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+  }
+
+  // Try to fold more of the base or index of AM into AM, where IsBase
+  // selects between the base and index.
+  bool expandAddress(SystemZAddressingMode &AM, bool IsBase);
+
+  // Try to describe N in AM, returning true on success.
+  bool selectAddress(SDValue N, SystemZAddressingMode &AM);
+
+  // Extract individual target operands from matched address AM.
+  void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+                          SDValue &Base, SDValue &Disp);
+  void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+                          SDValue &Base, SDValue &Disp, SDValue &Index);
+
+  // Try to match Addr as a FormBD address with displacement type DR.
+  // Return true on success, storing the base and displacement in
+  // Base and Disp respectively.
+  bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+                    SDValue &Base, SDValue &Disp);
+
+  // Try to match Addr as a FormBDX* address of form Form with
+  // displacement type DR.  Return true on success, storing the base,
+  // displacement and index in Base, Disp and Index respectively.
+  bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+                     SystemZAddressingMode::DispRange DR, SDValue Addr,
+                     SDValue &Base, SDValue &Disp, SDValue &Index);
+
+  // PC-relative address matching routines used by SystemZOperands.td.
+  bool selectPCRelAddress(SDValue Addr, SDValue &Target) {
+    if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) {
+      Target = Addr.getOperand(0);
+      return true;
+    }
+    return false;
+  }
+
+  // BD matching routines used by SystemZOperands.td.
+  bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);
+  }
+  bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+  }
+  bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);
+  }
+  bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+  }
+
+  // BDX matching routines used by SystemZOperands.td.
+  bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp12Only,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp12Pair,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+                            SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,
+                         SystemZAddressingMode::Disp12Only,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp20Only,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,
+                              SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp20Only128,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp20Pair,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                          SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+                         SystemZAddressingMode::Disp12Pair,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                          SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+                         SystemZAddressingMode::Disp20Pair,
+                         Addr, Base, Disp, Index);
+  }
+
+  // If Op0 is null, then Node is a constant that can be loaded using:
+  //
+  //   (Opcode UpperVal LowerVal)
+  //
+  // If Op0 is nonnull, then Node can be implemented using:
+  //
+  //   (Opcode (Opcode Op0 UpperVal) LowerVal)
+  SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
+                              uint64_t UpperVal, uint64_t LowerVal);
+
+public:
+  SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(TM, OptLevel),
+      Lowering(*TM.getTargetLowering()),
+      Subtarget(*TM.getSubtargetImpl()) { }
+
+  // Override MachineFunctionPass.
+  virtual const char *getPassName() const LLVM_OVERRIDE {
+    return "SystemZ DAG->DAG Pattern Instruction Selection";
+  }
+
+  // Override SelectionDAGISel.
+  virtual SDNode *Select(SDNode *Node) LLVM_OVERRIDE;
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps)
+    LLVM_OVERRIDE;
+
+  // Include the pieces autogenerated from the target description.
+  #include "SystemZGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+                                         CodeGenOpt::Level OptLevel) {
+  return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+// Return true if Val should be selected as a displacement for an address
+// with range DR.  Here we're interested in the range of both the instruction
+// described by DR and of any pairing instruction.
+static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+  switch (DR) {
+  case SystemZAddressingMode::Disp12Only:
+    return isUInt<12>(Val);
+
+  case SystemZAddressingMode::Disp12Pair:
+  case SystemZAddressingMode::Disp20Only:
+  case SystemZAddressingMode::Disp20Pair:
+    return isInt<20>(Val);
+
+  case SystemZAddressingMode::Disp20Only128:
+    return isInt<20>(Val) && isInt<20>(Val + 8);
+  }
+  llvm_unreachable("Unhandled displacement range");
+}
+
+// Change the base or index in AM to Value, where IsBase selects
+// between the base and index.
+static void changeComponent(SystemZAddressingMode &AM, bool IsBase,
+                            SDValue Value) {
+  if (IsBase)
+    AM.Base = Value;
+  else
+    AM.Index = Value;
+}
+
+// The base or index of AM is equivalent to Value + ADJDYNALLOC,
+// where IsBase selects between the base and index.  Try to fold the
+// ADJDYNALLOC into AM.
+static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase,
+                              SDValue Value) {
+  if (AM.isDynAlloc() && !AM.IncludesDynAlloc) {
+    changeComponent(AM, IsBase, Value);
+    AM.IncludesDynAlloc = true;
+    return true;
+  }
+  return false;
+}
+
+// The base of AM is equivalent to Base + Index.  Try to use Index as
+// the index register.
+static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,
+                        SDValue Index) {
+  if (AM.hasIndexField() && !AM.Index.getNode()) {
+    AM.Base = Base;
+    AM.Index = Index;
+    return true;
+  }
+  return false;
+}
+
+// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
+// between the base and index.  Try to fold Op1 into AM's displacement.
+static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
+                       SDValue Op0, ConstantSDNode *Op1) {
+  // First try adjusting the displacement.
+  int64_t TestDisp = AM.Disp + Op1->getSExtValue();
+  if (selectDisp(AM.DR, TestDisp)) {
+    changeComponent(AM, IsBase, Op0);
+    AM.Disp = TestDisp;
+    return true;
+  }
+
+  // We could consider forcing the displacement into a register and
+  // using it as an index, but it would need to be carefully tuned.
+  return false;
+}
+
+bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
+                                        bool IsBase) {
+  SDValue N = IsBase ? AM.Base : AM.Index;
+  unsigned Opcode = N.getOpcode();
+  if (Opcode == ISD::TRUNCATE) {
+    N = N.getOperand(0);
+    Opcode = N.getOpcode();
+  }
+  if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) {
+    SDValue Op0 = N.getOperand(0);
+    SDValue Op1 = N.getOperand(1);
+
+    unsigned Op0Code = Op0->getOpcode();
+    unsigned Op1Code = Op1->getOpcode();
+
+    if (Op0Code == SystemZISD::ADJDYNALLOC)
+      return expandAdjDynAlloc(AM, IsBase, Op1);
+    if (Op1Code == SystemZISD::ADJDYNALLOC)
+      return expandAdjDynAlloc(AM, IsBase, Op0);
+
+    if (Op0Code == ISD::Constant)
+      return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0));
+    if (Op1Code == ISD::Constant)
+      return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1));
+
+    if (IsBase && expandIndex(AM, Op0, Op1))
+      return true;
+  }
+  return false;
+}
+
+// Return true if an instruction with displacement range DR should be
+// used for displacement value Val.  selectDisp(DR, Val) must already hold.
+static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+  assert(selectDisp(DR, Val) && "Invalid displacement");
+  switch (DR) {
+  case SystemZAddressingMode::Disp12Only:
+  case SystemZAddressingMode::Disp20Only:
+  case SystemZAddressingMode::Disp20Only128:
+    return true;
+
+  case SystemZAddressingMode::Disp12Pair:
+    // Use the other instruction if the displacement is too large.
+    return isUInt<12>(Val);
+
+  case SystemZAddressingMode::Disp20Pair:
+    // Use the other instruction if the displacement is small enough.
+    return !isUInt<12>(Val);
+  }
+  llvm_unreachable("Unhandled displacement range");
+}
+
+// Return true if Base + Disp + Index should be performed by LA(Y).
+static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {
+  // Don't use LA(Y) for constants.
+  if (!Base)
+    return false;
+
+  // Always use LA(Y) for frame addresses, since we know that the destination
+  // register is almost always (perhaps always) going to be different from
+  // the frame register.
+  if (Base->getOpcode() == ISD::FrameIndex)
+    return true;
+
+  if (Disp) {
+    // Always use LA(Y) if there is a base, displacement and index.
+    if (Index)
+      return true;
+
+    // Always use LA if the displacement is small enough.  It should always
+    // be no worse than AGHI (and better if it avoids a move).
+    if (isUInt<12>(Disp))
+      return true;
+
+    // For similar reasons, always use LAY if the constant is too big for AGHI.
+    // LAY should be no worse than AGFI.
+    if (!isInt<16>(Disp))
+      return true;
+  } else {
+    // Don't use LA for plain registers.
+    if (!Index)
+      return false;
+
+    // Don't use LA for plain addition if the index operand is only used
+    // once.  It should be a natural two-operand addition in that case.
+    if (Index->hasOneUse())
+      return false;
+
+    // Prefer addition if the second operation is sign-extended, in the
+    // hope of using AGF.
+    unsigned IndexOpcode = Index->getOpcode();
+    if (IndexOpcode == ISD::SIGN_EXTEND ||
+        IndexOpcode == ISD::SIGN_EXTEND_INREG)
+      return false;
+  }
+
+  // Don't use LA for two-operand addition if either operand is only
+  // used once.  The addition instructions are better in that case.
+  if (Base->hasOneUse())
+    return false;
+
+  return true;
+}
+
+// Return true if Addr is suitable for AM, updating AM if so.
+bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
+                                        SystemZAddressingMode &AM) {
+  // Start out assuming that the address will need to be loaded separately,
+  // then try to extend it as much as we can.
+  AM.Base = Addr;
+
+  // First try treating the address as a constant.
+  if (Addr.getOpcode() == ISD::Constant &&
+      expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr)))
+    ;
+  else
+    // Otherwise try expanding each component.
+    while (expandAddress(AM, true) ||
+           (AM.Index.getNode() && expandAddress(AM, false)))
+      continue;
+
+  // Reject cases where it isn't profitable to use LA(Y).
+  if (AM.Form == SystemZAddressingMode::FormBDXLA &&
+      !shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode()))
+    return false;
+
+  // Reject cases where the other instruction in a pair should be used.
+  if (!isValidDisp(AM.DR, AM.Disp))
+    return false;
+
+  // Make sure that ADJDYNALLOC is included where necessary.
+  if (AM.isDynAlloc() && !AM.IncludesDynAlloc)
+    return false;
+
+  DEBUG(AM.dump());
+  return true;
+}
+
+// Insert a node into the DAG at least before Pos.  This will reposition
+// the node as needed, and will assign it a node ID that is <= Pos's ID.
+// Note that this does *not* preserve the uniqueness of node IDs!
+// The selection DAG must no longer depend on their uniqueness when this
+// function is used.
+static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
+  if (N.getNode()->getNodeId() == -1 ||
+      N.getNode()->getNodeId() > Pos->getNodeId()) {
+    DAG->RepositionNode(Pos, N.getNode());
+    N.getNode()->setNodeId(Pos->getNodeId());
+  }
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+                                             EVT VT, SDValue &Base,
+                                             SDValue &Disp) {
+  Base = AM.Base;
+  if (!Base.getNode())
+    // Register 0 means "no base".  This is mostly useful for shifts.
+    Base = CurDAG->getRegister(0, VT);
+  else if (Base.getOpcode() == ISD::FrameIndex) {
+    // Lower a FrameIndex to a TargetFrameIndex.
+    int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FrameIndex, VT);
+  } else if (Base.getValueType() != VT) {
+    // Truncate values from i64 to i32, for shifts.
+    assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 &&
+           "Unexpected truncation");
+    DebugLoc DL = Base.getDebugLoc();
+    SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base);
+    insertDAGNode(CurDAG, Base.getNode(), Trunc);
+    Base = Trunc;
+  }
+
+  // Lower the displacement to a TargetConstant.
+  Disp = CurDAG->getTargetConstant(AM.Disp, VT);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+                                             EVT VT, SDValue &Base,
+                                             SDValue &Disp, SDValue &Index) {
+  getAddressOperands(AM, VT, Base, Disp);
+
+  Index = AM.Index;
+  if (!Index.getNode())
+    // Register 0 means "no index".
+    Index = CurDAG->getRegister(0, VT);
+}
+
+bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
+                                       SDValue Addr, SDValue &Base,
+                                       SDValue &Disp) {
+  SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);
+  if (!selectAddress(Addr, AM))
+    return false;
+
+  getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+  return true;
+}
+
+bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+                                        SystemZAddressingMode::DispRange DR,
+                                        SDValue Addr, SDValue &Base,
+                                        SDValue &Disp, SDValue &Index) {
+  SystemZAddressingMode AM(Form, DR);
+  if (!selectAddress(Addr, AM))
+    return false;
+
+  getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index);
+  return true;
+}
+
+SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
+                                                 SDValue Op0, uint64_t UpperVal,
+                                                 uint64_t LowerVal) {
+  EVT VT = Node->getValueType(0);
+  DebugLoc DL = Node->getDebugLoc();
+  SDValue Upper = CurDAG->getConstant(UpperVal, VT);
+  if (Op0.getNode())
+    Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
+  Upper = SDValue(Select(Upper.getNode()), 0);
+
+  SDValue Lower = CurDAG->getConstant(LowerVal, VT);
+  SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
+  return Or.getNode();
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    return 0;
+  }
+
+  unsigned Opcode = Node->getOpcode();
+  switch (Opcode) {
+  case ISD::OR:
+  case ISD::XOR:
+    // If this is a 64-bit operation in which both 32-bit halves are nonzero,
+    // split the operation into two.
+    if (Node->getValueType(0) == MVT::i64)
+      if (ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+        uint64_t Val = Op1->getZExtValue();
+        if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val))
+          Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0),
+                                     Val - uint32_t(Val), uint32_t(Val));
+      }
+    break;
+
+  case ISD::Constant:
+    // If this is a 64-bit constant that is out of the range of LLILF,
+    // LLIHF and LGFI, split it into two 32-bit pieces.
+    if (Node->getValueType(0) == MVT::i64) {
+      uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue();
+      if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val))
+        Node = splitLargeImmediate(ISD::OR, Node, SDValue(),
+                                   Val - uint32_t(Val), uint32_t(Val));
+    }
+    break;
+
+  case ISD::ATOMIC_LOAD_SUB:
+    // Try to convert subtractions of constants to additions.
+    if (ConstantSDNode *Op2 = dyn_cast<ConstantSDNode>(Node->getOperand(2))) {
+      uint64_t Value = -Op2->getZExtValue();
+      EVT VT = Node->getValueType(0);
+      if (VT == MVT::i32 || isInt<32>(Value)) {
+        SDValue Ops[] = { Node->getOperand(0), Node->getOperand(1),
+                          CurDAG->getConstant(int32_t(Value), VT) };
+        Node = CurDAG->MorphNodeTo(Node, ISD::ATOMIC_LOAD_ADD,
+                                   Node->getVTList(), Ops, array_lengthof(Ops));
+      }
+    }
+    break;
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        errs() << "\n";
+        );
+  return ResNode;
+}
+
+bool SystemZDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op,
+                             char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "Unexpected constraint code");
+  // Accept addresses with short displacements, which are compatible
+  // with Q, R, S and T.  But keep the index operand for future expansion.
+  SDValue Base, Disp, Index;
+  if (!selectBDXAddr(SystemZAddressingMode::FormBD,
+                     SystemZAddressingMode::Disp12Only,
+                     Op, Base, Disp, Index))
+    return true;
+  OutOps.push_back(Base);
+  OutOps.push_back(Disp);
+  OutOps.push_back(Index);
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 0000000..eb21b31
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,2233 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+// Classify VT as either 32 or 64 bit.
+static bool is32Bit(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i32:
+    return true;
+  case MVT::i64:
+    return false;
+  default:
+    llvm_unreachable("Unsupported type");
+  }
+}
+
+// Return a version of MachineOperand that can be safely used before the
+// final use.
+static MachineOperand earlyUseOperand(MachineOperand Op) {
+  if (Op.isReg())
+    Op.setIsKill(false);
+  return Op;
+}
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
+  : TargetLowering(tm, new TargetLoweringObjectFileELF()),
+    Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+  MVT PtrVT = getPointerTy();
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32,  &SystemZ::GR32BitRegClass);
+  addRegisterClass(MVT::i64,  &SystemZ::GR64BitRegClass);
+  addRegisterClass(MVT::f32,  &SystemZ::FP32BitRegClass);
+  addRegisterClass(MVT::f64,  &SystemZ::FP64BitRegClass);
+  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Set up special registers.
+  setExceptionPointerRegister(SystemZ::R6D);
+  setExceptionSelectorRegister(SystemZ::R7D);
+  setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+  // TODO: It may be better to default to latency-oriented scheduling, however
+  // LLVM's current latency-oriented scheduler can't handle physreg definitions
+  // such as SystemZ has with PSW, so set this to the register-pressure
+  // scheduler, because it can.
+  setSchedulingPreference(Sched::RegPressure);
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+  // Instructions are strings of 2-byte aligned 2-byte values.
+  setMinFunctionAlignment(2);
+
+  // Handle operations that are handled in a similar way for all types.
+  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+       I <= MVT::LAST_FP_VALUETYPE;
+       ++I) {
+    MVT VT = MVT::SimpleValueType(I);
+    if (isTypeLegal(VT)) {
+      // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND).
+      setOperationAction(ISD::SETCC, VT, Expand);
+
+      // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
+      setOperationAction(ISD::SELECT, VT, Expand);
+
+      // Lower SELECT_CC and BR_CC into separate comparisons and branches.
+      setOperationAction(ISD::SELECT_CC, VT, Custom);
+      setOperationAction(ISD::BR_CC,     VT, Custom);
+    }
+  }
+
+  // Expand jump table branches as address arithmetic followed by an
+  // indirect jump.
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+  // Expand BRCOND into a BR_CC (see above).
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+  // Handle integer types.
+  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+       I <= MVT::LAST_INTEGER_VALUETYPE;
+       ++I) {
+    MVT VT = MVT::SimpleValueType(I);
+    if (isTypeLegal(VT)) {
+      // Expand individual DIV and REMs into DIVREMs.
+      setOperationAction(ISD::SDIV, VT, Expand);
+      setOperationAction(ISD::UDIV, VT, Expand);
+      setOperationAction(ISD::SREM, VT, Expand);
+      setOperationAction(ISD::UREM, VT, Expand);
+      setOperationAction(ISD::SDIVREM, VT, Custom);
+      setOperationAction(ISD::UDIVREM, VT, Custom);
+
+      // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP.
+      // FIXME: probably much too conservative.
+      setOperationAction(ISD::ATOMIC_LOAD,  VT, Expand);
+      setOperationAction(ISD::ATOMIC_STORE, VT, Expand);
+
+      // No special instructions for these.
+      setOperationAction(ISD::CTPOP,           VT, Expand);
+      setOperationAction(ISD::CTTZ,            VT, Expand);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+      setOperationAction(ISD::ROTR,            VT, Expand);
+
+      // Use *MUL_LOHI where possible and a wider multiplication otherwise.
+      setOperationAction(ISD::MULHS, VT, Expand);
+      setOperationAction(ISD::MULHU, VT, Expand);
+
+      // We have instructions for signed but not unsigned FP conversion.
+      setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+    }
+  }
+
+  // Type legalization will convert 8- and 16-bit atomic operations into
+  // forms that operate on i32s (but still keeping the original memory VT).
+  // Lower them into full i32 operations.
+  setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
+
+  // We have instructions for signed but not unsigned FP conversion.
+  // Handle unsigned 32-bit types as signed 64-bit types.
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+  // We have native support for a 64-bit CTLZ, via FLOGR.
+  setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+  setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+
+  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
+  setOperationAction(ISD::OR, MVT::i64, Custom);
+
+  // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
+  // but they aren't really worth using.  There is no 64-bit SMUL_LOHI,
+  // but there is a 64-bit UMUL_LOHI: MLGR.
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
+
+  // FIXME: Can we support these natively?
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+
+  // We have native instructions for i8, i16 and i32 extensions, but not i1.
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  // Handle the various types of symbolic address.
+  setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
+  setOperationAction(ISD::GlobalAddress,    PtrVT, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+  setOperationAction(ISD::BlockAddress,     PtrVT, Custom);
+  setOperationAction(ISD::JumpTable,        PtrVT, Custom);
+
+  // We need to handle dynamic allocations specially because of the
+  // 160-byte area at the bottom of the stack.
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
+
+  // Use custom expanders so that we can force the function to use
+  // a frame pointer.
+  setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
+
+  // Expand these using getExceptionSelectorRegister() and
+  // getExceptionPointerRegister().
+  setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand);
+  setOperationAction(ISD::EHSELECTION,   PtrVT, Expand);
+
+  // Handle floating-point types.
+  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
+       I <= MVT::LAST_FP_VALUETYPE;
+       ++I) {
+    MVT VT = MVT::SimpleValueType(I);
+    if (isTypeLegal(VT)) {
+      // We can use FI for FRINT.
+      setOperationAction(ISD::FRINT, VT, Legal);
+
+      // No special instructions for these.
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
+      setOperationAction(ISD::FREM, VT, Expand);
+    }
+  }
+
+  // We have fused multiply-addition for f32 and f64 but not f128.
+  setOperationAction(ISD::FMA, MVT::f32,  Legal);
+  setOperationAction(ISD::FMA, MVT::f64,  Legal);
+  setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+  // Needed so that we don't try to implement f128 constant loads using
+  // a load-and-extend of a f80 constant (in cases where the constant
+  // would fit in an f80).
+  setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
+
+  // Floating-point truncation and stores need to be done separately.
+  setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+
+  // We have 64-bit FPR<->GPR moves, but need special handling for
+  // 32-bit forms.
+  setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+  setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+
+  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
+  // structure, but VAEND is a no-op.
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
+  return Imm.isZero() || Imm.isNegZero();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline asm support
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'a': // Address register
+    case 'd': // Data register (equivalent to 'r')
+    case 'f': // Floating-point register
+    case 'r': // General-purpose register
+      return C_RegisterClass;
+
+    case 'Q': // Memory with base and unsigned 12-bit displacement
+    case 'R': // Likewise, plus an index
+    case 'S': // Memory with base and signed 20-bit displacement
+    case 'T': // Likewise, plus an index
+    case 'm': // Equivalent to 'T'.
+      return C_Memory;
+
+    case 'I': // Unsigned 8-bit constant
+    case 'J': // Unsigned 12-bit constant
+    case 'K': // Signed 16-bit constant
+    case 'L': // Signed 20-bit displacement (on all targets we support)
+    case 'M': // 0x7fffffff
+      return C_Other;
+
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight SystemZTargetLowering::
+getSingleConstraintMatchWeight(AsmOperandInfo &info,
+                               const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+  // If we don't have a value, we can't do a match,
+  // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+
+  case 'a': // Address register
+  case 'd': // Data register (equivalent to 'r')
+  case 'r': // General-purpose register
+    if (CallOperandVal->getType()->isIntegerTy())
+      weight = CW_Register;
+    break;
+
+  case 'f': // Floating-point register
+    if (type->isFloatingPointTy())
+      weight = CW_Register;
+    break;
+
+  case 'I': // Unsigned 8-bit constant
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isUInt<8>(C->getZExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'J': // Unsigned 12-bit constant
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isUInt<12>(C->getZExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'K': // Signed 16-bit constant
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isInt<16>(C->getSExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'L': // Signed 20-bit displacement (on all targets we support)
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isInt<20>(C->getSExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'M': // 0x7fffffff
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (C->getZExtValue() == 0x7fffffff)
+        weight = CW_Constant;
+    break;
+  }
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'd': // Data register (equivalent to 'r')
+    case 'r': // General-purpose register
+      if (VT == MVT::i64)
+        return std::make_pair(0U, &SystemZ::GR64BitRegClass);
+      else if (VT == MVT::i128)
+        return std::make_pair(0U, &SystemZ::GR128BitRegClass);
+      return std::make_pair(0U, &SystemZ::GR32BitRegClass);
+
+    case 'a': // Address register
+      if (VT == MVT::i64)
+        return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
+      else if (VT == MVT::i128)
+        return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
+      return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
+
+    case 'f': // Floating-point register
+      if (VT == MVT::f64)
+        return std::make_pair(0U, &SystemZ::FP64BitRegClass);
+      else if (VT == MVT::f128)
+        return std::make_pair(0U, &SystemZ::FP128BitRegClass);
+      return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+void SystemZTargetLowering::
+LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+                             std::vector<SDValue> &Ops,
+                             SelectionDAG &DAG) const {
+  // Only support length 1 constraints for now.
+  if (Constraint.length() == 1) {
+    switch (Constraint[0]) {
+    case 'I': // Unsigned 8-bit constant
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isUInt<8>(C->getZExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'J': // Unsigned 12-bit constant
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isUInt<12>(C->getZExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'K': // Signed 16-bit constant
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isInt<16>(C->getSExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'L': // Signed 20-bit displacement (on all targets we support)
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isInt<20>(C->getSExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'M': // 0x7fffffff
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (C->getZExtValue() == 0x7fffffff)
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+                                              Op.getValueType()));
+      return;
+    }
+  }
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling conventions
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+// Value is a value that has been passed to us in the location described by VA
+// (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
+// any loads onto Chain.
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL,
+                                   CCValAssign &VA, SDValue Chain,
+                                   SDValue Value) {
+  // If the argument has been promoted from a smaller type, insert an
+  // assertion to capture this.
+  if (VA.getLocInfo() == CCValAssign::SExt)
+    Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
+                        DAG.getValueType(VA.getValVT()));
+  else if (VA.getLocInfo() == CCValAssign::ZExt)
+    Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
+                        DAG.getValueType(VA.getValVT()));
+
+  if (VA.isExtInLoc())
+    Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
+  else if (VA.getLocInfo() == CCValAssign::Indirect)
+    Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
+                        MachinePointerInfo(), false, false, false, 0);
+  else
+    assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
+  return Value;
+}
+
+// Value is a value of type VA.getValVT() that we need to copy into
+// the location described by VA.  Return a copy of Value converted to
+// VA.getValVT().  The caller is responsible for handling indirect values.
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL,
+                                   CCValAssign &VA, SDValue Value) {
+  switch (VA.getLocInfo()) {
+  case CCValAssign::SExt:
+    return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
+  case CCValAssign::ZExt:
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
+  case CCValAssign::AExt:
+    return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+  case CCValAssign::Full:
+    return Value;
+  default:
+    llvm_unreachable("Unhandled getLocInfo()");
+  }
+}
+
+SDValue SystemZTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc DL, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SystemZMachineFunctionInfo *FuncInfo =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  const SystemZFrameLowering *TFL =
+    static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+  unsigned NumFixedGPRs = 0;
+  unsigned NumFixedFPRs = 0;
+  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+    SDValue ArgValue;
+    CCValAssign &VA = ArgLocs[I];
+    EVT LocVT = VA.getLocVT();
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      const TargetRegisterClass *RC;
+      switch (LocVT.getSimpleVT().SimpleTy) {
+      default:
+        // Integers smaller than i64 should be promoted to i64.
+        llvm_unreachable("Unexpected argument type");
+      case MVT::i32:
+        NumFixedGPRs += 1;
+        RC = &SystemZ::GR32BitRegClass;
+        break;
+      case MVT::i64:
+        NumFixedGPRs += 1;
+        RC = &SystemZ::GR64BitRegClass;
+        break;
+      case MVT::f32:
+        NumFixedFPRs += 1;
+        RC = &SystemZ::FP32BitRegClass;
+        break;
+      case MVT::f64:
+        NumFixedFPRs += 1;
+        RC = &SystemZ::FP64BitRegClass;
+        break;
+      }
+
+      unsigned VReg = MRI.createVirtualRegister(RC);
+      MRI.addLiveIn(VA.getLocReg(), VReg);
+      ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+    } else {
+      assert(VA.isMemLoc() && "Argument not register or memory");
+
+      // Create the frame index object for this incoming parameter.
+      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
+                                      VA.getLocMemOffset(), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      // from this parameter.  Unpromoted ints and floats are
+      // passed as right-justified 8-byte values.
+      EVT PtrVT = getPointerTy();
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4));
+      ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
+                             MachinePointerInfo::getFixedStack(FI),
+                             false, false, false, 0);
+    }
+
+    // Convert the value of the argument register into the value that's
+    // being passed.
+    InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
+  }
+
+  if (IsVarArg) {
+    // Save the number of non-varargs registers for later use by va_start, etc.
+    FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
+    FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
+
+    // Likewise the address (in the form of a frame index) of where the
+    // first stack vararg would be.  The 1-byte size here is arbitrary.
+    int64_t StackSize = CCInfo.getNextStackOffset();
+    FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
+
+    // ...and a similar frame index for the caller-allocated save area
+    // that will be used to store the incoming registers.
+    int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
+    unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
+    FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
+
+    // Store the FPR varargs in the reserved frame slots.  (We store the
+    // GPRs as part of the prologue.)
+    if (NumFixedFPRs < SystemZ::NumArgFPRs) {
+      SDValue MemOps[SystemZ::NumArgFPRs];
+      for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
+        unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
+        int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
+        SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+        unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
+                                     &SystemZ::FP64BitRegClass);
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
+        MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
+                                 MachinePointerInfo::getFixedStack(FI),
+                                 false, false, 0);
+
+      }
+      // Join the stores, which are independent of one another.
+      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                          &MemOps[NumFixedFPRs],
+                          SystemZ::NumArgFPRs - NumFixedFPRs);
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc &DL = CLI.DL;
+  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &isTailCall = CLI.IsTailCall;
+  CallingConv::ID CallConv = CLI.CallConv;
+  bool IsVarArg = CLI.IsVarArg;
+  MachineFunction &MF = DAG.getMachineFunction();
+  EVT PtrVT = getPointerTy();
+
+  // SystemZ target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze the operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+
+  // Mark the start of the call.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true));
+
+  // Copy argument values to their designated locations.
+  SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+  SDValue StackPtr;
+  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+    CCValAssign &VA = ArgLocs[I];
+    SDValue ArgValue = OutVals[I];
+
+    if (VA.getLocInfo() == CCValAssign::Indirect) {
+      // Store the argument in a stack slot and pass its address.
+      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+                                         MachinePointerInfo::getFixedStack(FI),
+                                         false, false, 0));
+      ArgValue = SpillSlot;
+    } else
+      ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
+
+    if (VA.isRegLoc())
+      // Queue up the argument copies and emit them at the end.
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+    else {
+      assert(VA.isMemLoc() && "Argument not register or memory");
+
+      // Work out the address of the stack slot.  Unpromoted ints and
+      // floats are passed as right-justified 8-byte values.
+      if (!StackPtr.getNode())
+        StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
+      unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
+      if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+        Offset += 4;
+      SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+                                    DAG.getIntPtrConstant(Offset));
+
+      // Emit the store.
+      MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+    }
+  }
+
+  // Join the stores, which are independent of one another.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes, chained and glued together.
+  SDValue Glue;
+  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+                             RegsToPass[I].second, Glue);
+    Glue = Chain.getValue(1);
+  }
+
+  // Accept direct calls by converting symbolic call addresses to the
+  // associated Target* opcodes.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
+    Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+  } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
+    Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+  }
+
+  // The first call operand is the chain and the second is the target address.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
+    Ops.push_back(DAG.getRegister(RegsToPass[I].first,
+                                  RegsToPass[I].second.getValueType()));
+
+  // Glue the call to the argument copies, if any.
+  if (Glue.getNode())
+    Ops.push_back(Glue);
+
+  // Emit the call.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+  Glue = Chain.getValue(1);
+
+  // Mark the end of the call, which is glued to the call itself.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, PtrVT, true),
+                             DAG.getConstant(0, PtrVT, true),
+                             Glue);
+  Glue = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RetLocs;
+  CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+    CCValAssign &VA = RetLocs[I];
+
+    // Copy the value out, gluing the copy to the end of the call sequence.
+    SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
+                                          VA.getLocVT(), Glue);
+    Chain = RetValue.getValue(1);
+    Glue = RetValue.getValue(2);
+
+    // Convert the value of the return register into the value that's
+    // being returned.
+    InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
+  }
+
+  return Chain;
+}
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool IsVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc DL, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Assign locations to each returned value.
+  SmallVector<CCValAssign, 16> RetLocs;
+  CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+  // Quick exit for void returns
+  if (RetLocs.empty())
+    return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
+
+  // Copy the result values into the output registers.
+  SDValue Glue;
+  SmallVector<SDValue, 4> RetOps;
+  RetOps.push_back(Chain);
+  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+    CCValAssign &VA = RetLocs[I];
+    SDValue RetValue = OutVals[I];
+
+    // Make the return register live on exit.
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // Promote the value as required.
+    RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
+
+    // Chain and glue the copies together.
+    unsigned Reg = VA.getLocReg();
+    Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
+    Glue = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
+  }
+
+  // Update chain and glue.
+  RetOps[0] = Chain;
+  if (Glue.getNode())
+    RetOps.push_back(Glue);
+
+  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other,
+                     RetOps.data(), RetOps.size());
+}
+
+// CC is a comparison that will be implemented using an integer or
+// floating-point comparison.  Return the condition code mask for
+// a branch on true.  In the integer case, CCMASK_CMP_UO is set for
+// unsigned comparisons and clear for signed ones.  In the floating-point
+// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
+static unsigned CCMaskForCondCode(ISD::CondCode CC) {
+#define CONV(X) \
+  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
+  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
+  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
+
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid integer condition!");
+
+  CONV(EQ);
+  CONV(NE);
+  CONV(GT);
+  CONV(GE);
+  CONV(LT);
+  CONV(LE);
+
+  case ISD::SETO:  return SystemZ::CCMASK_CMP_O;
+  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
+  }
+#undef CONV
+}
+
+// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
+// is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary.
+static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
+                             SDValue &CmpOp0, SDValue &CmpOp1,
+                             unsigned &CCMask) {
+  // For us to make any changes, it must a comparison between a single-use
+  // load and a constant.
+  if (!CmpOp0.hasOneUse() ||
+      CmpOp0.getOpcode() != ISD::LOAD ||
+      CmpOp1.getOpcode() != ISD::Constant)
+    return;
+
+  // We must have an 8- or 16-bit load.
+  LoadSDNode *Load = cast<LoadSDNode>(CmpOp0);
+  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
+  if (NumBits != 8 && NumBits != 16)
+    return;
+
+  // The load must be an extending one and the constant must be within the
+  // range of the unextended value.
+  ConstantSDNode *Constant = cast<ConstantSDNode>(CmpOp1);
+  uint64_t Value = Constant->getZExtValue();
+  uint64_t Mask = (1 << NumBits) - 1;
+  if (Load->getExtensionType() == ISD::SEXTLOAD) {
+    int64_t SignedValue = Constant->getSExtValue();
+    if (uint64_t(SignedValue) + (1 << (NumBits - 1)) > Mask)
+      return;
+    // Unsigned comparison between two sign-extended values is equivalent
+    // to unsigned comparison between two zero-extended values.
+    if (IsUnsigned)
+      Value &= Mask;
+    else if (CCMask == SystemZ::CCMASK_CMP_EQ ||
+             CCMask == SystemZ::CCMASK_CMP_NE)
+      // Any choice of IsUnsigned is OK for equality comparisons.
+      // We could use either CHHSI or CLHHSI for 16-bit comparisons,
+      // but since we use CLHHSI for zero extensions, it seems better
+      // to be consistent and do the same here.
+      Value &= Mask, IsUnsigned = true;
+    else if (NumBits == 8) {
+      // Try to treat the comparison as unsigned, so that we can use CLI.
+      // Adjust CCMask and Value as necessary.
+      if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT)
+        // Test whether the high bit of the byte is set.
+        Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true;
+      else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT)
+        // Test whether the high bit of the byte is clear.
+        Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true;
+      else
+        // No instruction exists for this combination.
+        return;
+    }
+  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
+    if (Value > Mask)
+      return;
+    // Signed comparison between two zero-extended values is equivalent
+    // to unsigned comparison.
+    IsUnsigned = true;
+  } else
+    return;
+
+  // Make sure that the first operand is an i32 of the right extension type.
+  ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
+  if (CmpOp0.getValueType() != MVT::i32 ||
+      Load->getExtensionType() != ExtType)
+    CmpOp0 = DAG.getExtLoad(ExtType, Load->getDebugLoc(), MVT::i32,
+                            Load->getChain(), Load->getBasePtr(),
+                            Load->getPointerInfo(), Load->getMemoryVT(),
+                            Load->isVolatile(), Load->isNonTemporal(),
+                            Load->getAlignment());
+
+  // Make sure that the second operand is an i32 with the right value.
+  if (CmpOp1.getValueType() != MVT::i32 ||
+      Value != Constant->getZExtValue())
+    CmpOp1 = DAG.getConstant(Value, MVT::i32);
+}
+
+// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1
+// is an equality comparison that is better implemented using unsigned
+// rather than signed comparison instructions.
+static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0,
+                                     SDValue CmpOp1, unsigned CCMask) {
+  // The test must be for equality or inequality.
+  if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE)
+    return false;
+
+  if (CmpOp1.getOpcode() == ISD::Constant) {
+    uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue();
+
+    // If we're comparing with memory, prefer unsigned comparisons for
+    // values that are in the unsigned 16-bit range but not the signed
+    // 16-bit range.  We want to use CLFHSI and CLGHSI.
+    if (CmpOp0.hasOneUse() &&
+        ISD::isNormalLoad(CmpOp0.getNode()) &&
+        (Value >= 32768 && Value < 65536))
+      return true;
+
+    // Use unsigned comparisons for values that are in the CLGFI range
+    // but not in the CGFI range.
+    if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1)
+      return true;
+
+    return false;
+  }
+
+  // Prefer CL for zero-extended loads.
+  if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND ||
+      ISD::isZEXTLoad(CmpOp1.getNode()))
+    return true;
+
+  // ...and for "in-register" zero extensions.
+  if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) {
+    SDValue Mask = CmpOp1.getOperand(1);
+    if (Mask.getOpcode() == ISD::Constant &&
+        cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff)
+      return true;
+  }
+
+  return false;
+}
+
+// Return a target node that compares CmpOp0 and CmpOp1.  Set CCMask to the
+// 4-bit condition-code mask for CC.
+static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
+                       ISD::CondCode CC, unsigned &CCMask) {
+  bool IsUnsigned = false;
+  CCMask = CCMaskForCondCode(CC);
+  if (!CmpOp0.getValueType().isFloatingPoint()) {
+    IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO;
+    CCMask &= ~SystemZ::CCMASK_CMP_UO;
+    adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
+    if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask))
+      IsUnsigned = true;
+  }
+
+  DebugLoc DL = CmpOp0.getDebugLoc();
+  return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+                     DL, MVT::Glue, CmpOp0, CmpOp1);
+}
+
+// Lower a binary operation that produces two VT results, one in each
+// half of a GR128 pair.  Op0 and Op1 are the VT operands to the operation,
+// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
+// on the extended Op0 and (unextended) Op1.  Store the even register result
+// in Even and the odd register result in Odd.
+static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+                             unsigned Extend, unsigned Opcode,
+                             SDValue Op0, SDValue Op1,
+                             SDValue &Even, SDValue &Odd) {
+  SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
+  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
+                               SDValue(In128, 0), Op1);
+  bool Is32Bit = is32Bit(VT);
+  SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT);
+  SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT);
+  SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                    VT, Result, SubReg0);
+  SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                    VT, Result, SubReg1);
+  Even = SDValue(Reg0, 0);
+  Odd = SDValue(Reg1, 0);
+}
+
+SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain    = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue CmpOp0   = Op.getOperand(2);
+  SDValue CmpOp1   = Op.getOperand(3);
+  SDValue Dest     = Op.getOperand(4);
+  DebugLoc DL      = Op.getDebugLoc();
+
+  unsigned CCMask;
+  SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
+                     Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
+}
+
+SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDValue CmpOp0   = Op.getOperand(0);
+  SDValue CmpOp1   = Op.getOperand(1);
+  SDValue TrueOp   = Op.getOperand(2);
+  SDValue FalseOp  = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  DebugLoc DL      = Op.getDebugLoc();
+
+  unsigned CCMask;
+  SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(TrueOp);
+  Ops.push_back(FalseOp);
+  Ops.push_back(DAG.getConstant(CCMask, MVT::i32));
+  Ops.push_back(Flags);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
+                                                  SelectionDAG &DAG) const {
+  DebugLoc DL = Node->getDebugLoc();
+  const GlobalValue *GV = Node->getGlobal();
+  int64_t Offset = Node->getOffset();
+  EVT PtrVT = getPointerTy();
+  Reloc::Model RM = TM.getRelocationModel();
+  CodeModel::Model CM = TM.getCodeModel();
+
+  SDValue Result;
+  if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
+    // Make sure that the offset is aligned to a halfword.  If it isn't,
+    // create an "anchor" at the previous 12-bit boundary.
+    // FIXME check whether there is a better way of handling this.
+    if (Offset & 1) {
+      Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+                                          Offset & ~uint64_t(0xfff));
+      Offset &= 0xfff;
+    } else {
+      Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
+      Offset = 0;
+    }
+    Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+  } else {
+    Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
+    Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, false, 0);
+  }
+
+  // If there was a non-zero offset that we didn't fold, create an explicit
+  // addition for it.
+  if (Offset != 0)
+    Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
+                         DAG.getConstant(Offset, PtrVT));
+
+  return Result;
+}
+
+SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+						     SelectionDAG &DAG) const {
+  DebugLoc DL = Node->getDebugLoc();
+  const GlobalValue *GV = Node->getGlobal();
+  EVT PtrVT = getPointerTy();
+  TLSModel::Model model = TM.getTLSModel(GV);
+
+  if (model != TLSModel::LocalExec)
+    llvm_unreachable("only local-exec TLS mode supported");
+
+  // The high part of the thread pointer is in access register 0.
+  SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
+                             DAG.getConstant(0, MVT::i32));
+  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
+
+  // The low part of the thread pointer is in access register 1.
+  SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
+                             DAG.getConstant(1, MVT::i32));
+  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
+
+  // Merge them into a single 64-bit address.
+  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
+				    DAG.getConstant(32, PtrVT));
+  SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+
+  // Get the offset of GA from the thread pointer.
+  SystemZConstantPoolValue *CPV =
+    SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+  // Force the offset into the constant pool and load it from there.
+  SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
+  SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+			       CPAddr, MachinePointerInfo::getConstantPool(),
+			       false, false, false, 0);
+
+  // Add the base and offset together.
+  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
+}
+
+SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc DL = Node->getDebugLoc();
+  const BlockAddress *BA = Node->getBlockAddress();
+  int64_t Offset = Node->getOffset();
+  EVT PtrVT = getPointerTy();
+
+  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
+  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+  return Result;
+}
+
+SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
+                                              SelectionDAG &DAG) const {
+  DebugLoc DL = JT->getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+
+  // Use LARL to load the address of the table.
+  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc DL = CP->getDebugLoc();
+  EVT PtrVT = getPointerTy();
+
+  SDValue Result;
+  if (CP->isMachineConstantPoolEntry())
+    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+				       CP->getAlignment());
+  else
+    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+				       CP->getAlignment(), CP->getOffset());
+
+  // Use LARL to load the address of the constant pool entry.
+  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue In = Op.getOperand(0);
+  EVT InVT = In.getValueType();
+  EVT ResVT = Op.getValueType();
+
+  SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
+  SDValue Shift32 = DAG.getConstant(32, MVT::i64);
+  if (InVT == MVT::i32 && ResVT == MVT::f32) {
+    SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
+    SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32);
+    SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift);
+    SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                     MVT::f32, Out64, SubReg32);
+    return SDValue(Out, 0);
+  }
+  if (InVT == MVT::f32 && ResVT == MVT::i32) {
+    SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
+    SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                      MVT::f64, SDValue(U64, 0), In, SubReg32);
+    SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0));
+    SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32);
+    SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
+    return Out;
+  }
+  llvm_unreachable("Unexpected bitcast combination");
+}
+
+SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SystemZMachineFunctionInfo *FuncInfo =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  EVT PtrVT = getPointerTy();
+
+  SDValue Chain   = Op.getOperand(0);
+  SDValue Addr    = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  DebugLoc DL     = Op.getDebugLoc();
+
+  // The initial values of each field.
+  const unsigned NumFields = 4;
+  SDValue Fields[NumFields] = {
+    DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT),
+    DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT),
+    DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
+    DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
+  };
+
+  // Store each field into its respective slot.
+  SDValue MemOps[NumFields];
+  unsigned Offset = 0;
+  for (unsigned I = 0; I < NumFields; ++I) {
+    SDValue FieldAddr = Addr;
+    if (Offset != 0)
+      FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
+                              DAG.getIntPtrConstant(Offset));
+    MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
+                             MachinePointerInfo(SV, Offset),
+                             false, false, 0);
+    Offset += 8;
+  }
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields);
+}
+
+SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue Chain      = Op.getOperand(0);
+  SDValue DstPtr     = Op.getOperand(1);
+  SDValue SrcPtr     = Op.getOperand(2);
+  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  DebugLoc DL        = Op.getDebugLoc();
+
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
+                       /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue SystemZTargetLowering::
+lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  DebugLoc DL   = Op.getDebugLoc();
+
+  unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+  // Get a reference to the stack pointer.
+  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+
+  // Get the new stack pointer value.
+  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
+
+  // Copy the new stack pointer back.
+  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+
+  // The allocated data lives above the 160 bytes allocated for the standard
+  // frame, plus any outgoing stack arguments.  We don't know how much that
+  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
+  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
+  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
+
+  SDValue Ops[2] = { Result, Chain };
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
+
+  // UMUL_LOHI64 returns the low result in the odd register and the high
+  // result in the even register.  UMUL_LOHI is defined to return the
+  // low half first, so the results are in reverse order.
+  SDValue Ops[2];
+  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+                   Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // We use DSGF for 32-bit division.
+  if (is32Bit(VT)) {
+    Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
+    Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1);
+  }
+
+  // DSG(F) takes a 64-bit dividend, so the even register in the GR128
+  // input is "don't care".  The instruction returns the remainder in
+  // the even register and the quotient in the odd register.
+  SDValue Ops[2];
+  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64,
+                   Op0, Op1, Ops[1], Ops[0]);
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // DL(G) uses a double-width dividend, so we need to clear the even
+  // register in the GR128 input.  The instruction returns the remainder
+  // in the even register and the quotient in the odd register.
+  SDValue Ops[2];
+  if (is32Bit(VT))
+    lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
+                     Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+  else
+    lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
+                     Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
+
+  // Get the known-zero masks for each operand.
+  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+  APInt KnownZero[2], KnownOne[2];
+  DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]);
+  DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]);
+
+  // See if the upper 32 bits of one operand and the lower 32 bits of the
+  // other are known zero.  They are the low and high operands respectively.
+  uint64_t Masks[] = { KnownZero[0].getZExtValue(),
+                       KnownZero[1].getZExtValue() };
+  unsigned High, Low;
+  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
+    High = 1, Low = 0;
+  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
+    High = 0, Low = 1;
+  else
+    return Op;
+
+  SDValue LowOp = Ops[Low];
+  SDValue HighOp = Ops[High];
+
+  // If the high part is a constant, we're better off using IILH.
+  if (HighOp.getOpcode() == ISD::Constant)
+    return Op;
+
+  // If the low part is a constant that is outside the range of LHI,
+  // then we're better off using IILF.
+  if (LowOp.getOpcode() == ISD::Constant) {
+    int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
+    if (!isInt<16>(Value))
+      return Op;
+  }
+
+  // Check whether the high part is an AND that doesn't change the
+  // high 32 bits and just masks out low bits.  We can skip it if so.
+  if (HighOp.getOpcode() == ISD::AND &&
+      HighOp.getOperand(1).getOpcode() == ISD::Constant) {
+    ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
+    uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
+    if ((Mask >> 32) == 0xffffffff)
+      HighOp = HighOp.getOperand(0);
+  }
+
+  // Take advantage of the fact that all GR32 operations only change the
+  // low 32 bits by truncating Low to an i32 and inserting it directly
+  // using a subreg.  The interesting cases are those where the truncation
+  // can be folded.
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
+  SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
+  SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                      MVT::i64, HighOp, Low32, SubReg32);
+  return SDValue(Result, 0);
+}
+
+// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation.  Lower the first
+// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
+                                                SelectionDAG &DAG,
+                                                unsigned Opcode) const {
+  AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+
+  // 32-bit operations need no code outside the main loop.
+  EVT NarrowVT = Node->getMemoryVT();
+  EVT WideVT = MVT::i32;
+  if (NarrowVT == WideVT)
+    return Op;
+
+  int64_t BitSize = NarrowVT.getSizeInBits();
+  SDValue ChainIn = Node->getChain();
+  SDValue Addr = Node->getBasePtr();
+  SDValue Src2 = Node->getVal();
+  MachineMemOperand *MMO = Node->getMemOperand();
+  DebugLoc DL = Node->getDebugLoc();
+  EVT PtrVT = Addr.getValueType();
+
+  // Convert atomic subtracts of constants into additions.
+  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
+    if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Src2)) {
+      Opcode = SystemZISD::ATOMIC_LOADW_ADD;
+      Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType());
+    }
+
+  // Get the address of the containing word.
+  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+                                    DAG.getConstant(-4, PtrVT));
+
+  // Get the number of bits that the word must be rotated left in order
+  // to bring the field to the top bits of a GR32.
+  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+                                 DAG.getConstant(3, PtrVT));
+  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+  // Get the complementing shift amount, for rotating a field in the top
+  // bits back to its proper position.
+  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+                                    DAG.getConstant(0, WideVT), BitShift);
+
+  // Extend the source operand to 32 bits and prepare it for the inner loop.
+  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
+  // operations require the source to be shifted in advance.  (This shift
+  // can be folded if the source is constant.)  For AND and NAND, the lower
+  // bits must be set, while for other opcodes they should be left clear.
+  if (Opcode != SystemZISD::ATOMIC_SWAPW)
+    Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
+                       DAG.getConstant(32 - BitSize, WideVT));
+  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
+      Opcode == SystemZISD::ATOMIC_LOADW_NAND)
+    Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
+                       DAG.getConstant(uint32_t(-1) >> BitSize, WideVT));
+
+  // Construct the ATOMIC_LOADW_* node.
+  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
+                    DAG.getConstant(BitSize, WideVT) };
+  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
+                                             array_lengthof(Ops),
+                                             NarrowVT, MMO);
+
+  // Rotate the result of the final CS so that the field is in the lower
+  // bits of a GR32, then truncate it.
+  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
+                                    DAG.getConstant(BitSize, WideVT));
+  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
+
+  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
+  return DAG.getMergeValues(RetOps, 2, DL);
+}
+
+// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation.  Lower the first two
+// into a fullword ATOMIC_CMP_SWAPW operation.
+SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+
+  // We have native support for 32-bit compare and swap.
+  EVT NarrowVT = Node->getMemoryVT();
+  EVT WideVT = MVT::i32;
+  if (NarrowVT == WideVT)
+    return Op;
+
+  int64_t BitSize = NarrowVT.getSizeInBits();
+  SDValue ChainIn = Node->getOperand(0);
+  SDValue Addr = Node->getOperand(1);
+  SDValue CmpVal = Node->getOperand(2);
+  SDValue SwapVal = Node->getOperand(3);
+  MachineMemOperand *MMO = Node->getMemOperand();
+  DebugLoc DL = Node->getDebugLoc();
+  EVT PtrVT = Addr.getValueType();
+
+  // Get the address of the containing word.
+  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+                                    DAG.getConstant(-4, PtrVT));
+
+  // Get the number of bits that the word must be rotated left in order
+  // to bring the field to the top bits of a GR32.
+  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+                                 DAG.getConstant(3, PtrVT));
+  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+  // Get the complementing shift amount, for rotating a field in the top
+  // bits back to its proper position.
+  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+                                    DAG.getConstant(0, WideVT), BitShift);
+
+  // Construct the ATOMIC_CMP_SWAPW node.
+  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
+                    NegBitShift, DAG.getConstant(BitSize, WideVT) };
+  SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
+                                             VTList, Ops, array_lengthof(Ops),
+                                             NarrowVT, MMO);
+  return AtomicOp;
+}
+
+SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+  return DAG.getCopyFromReg(Op.getOperand(0), Op.getDebugLoc(),
+                            SystemZ::R15D, Op.getValueType());
+}
+
+SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+  return DAG.getCopyToReg(Op.getOperand(0), Op.getDebugLoc(),
+                          SystemZ::R15D, Op.getOperand(1));
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  case ISD::BR_CC:
+    return lowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:
+    return lowerSELECT_CC(Op, DAG);
+  case ISD::GlobalAddress:
+    return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
+  case ISD::GlobalTLSAddress:
+    return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
+  case ISD::BlockAddress:
+    return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
+  case ISD::JumpTable:
+    return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
+  case ISD::ConstantPool:
+    return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
+  case ISD::BITCAST:
+    return lowerBITCAST(Op, DAG);
+  case ISD::VASTART:
+    return lowerVASTART(Op, DAG);
+  case ISD::VACOPY:
+    return lowerVACOPY(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC:
+    return lowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::UMUL_LOHI:
+    return lowerUMUL_LOHI(Op, DAG);
+  case ISD::SDIVREM:
+    return lowerSDIVREM(Op, DAG);
+  case ISD::UDIVREM:
+    return lowerUDIVREM(Op, DAG);
+  case ISD::OR:
+    return lowerOR(Op, DAG);
+  case ISD::ATOMIC_SWAP:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW);
+  case ISD::ATOMIC_LOAD_ADD:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
+  case ISD::ATOMIC_LOAD_SUB:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
+  case ISD::ATOMIC_LOAD_AND:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
+  case ISD::ATOMIC_LOAD_OR:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
+  case ISD::ATOMIC_LOAD_XOR:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
+  case ISD::ATOMIC_LOAD_NAND:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
+  case ISD::ATOMIC_LOAD_MIN:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
+  case ISD::ATOMIC_LOAD_MAX:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
+  case ISD::ATOMIC_LOAD_UMIN:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
+  case ISD::ATOMIC_LOAD_UMAX:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
+  case ISD::ATOMIC_CMP_SWAP:
+    return lowerATOMIC_CMP_SWAP(Op, DAG);
+  case ISD::STACKSAVE:
+    return lowerSTACKSAVE(Op, DAG);
+  case ISD::STACKRESTORE:
+    return lowerSTACKRESTORE(Op, DAG);
+  default:
+    llvm_unreachable("Unexpected node to lower");
+  }
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
+  switch (Opcode) {
+    OPCODE(RET_FLAG);
+    OPCODE(CALL);
+    OPCODE(PCREL_WRAPPER);
+    OPCODE(CMP);
+    OPCODE(UCMP);
+    OPCODE(BR_CCMASK);
+    OPCODE(SELECT_CCMASK);
+    OPCODE(ADJDYNALLOC);
+    OPCODE(EXTRACT_ACCESS);
+    OPCODE(UMUL_LOHI64);
+    OPCODE(SDIVREM64);
+    OPCODE(UDIVREM32);
+    OPCODE(UDIVREM64);
+    OPCODE(ATOMIC_SWAPW);
+    OPCODE(ATOMIC_LOADW_ADD);
+    OPCODE(ATOMIC_LOADW_SUB);
+    OPCODE(ATOMIC_LOADW_AND);
+    OPCODE(ATOMIC_LOADW_OR);
+    OPCODE(ATOMIC_LOADW_XOR);
+    OPCODE(ATOMIC_LOADW_NAND);
+    OPCODE(ATOMIC_LOADW_MIN);
+    OPCODE(ATOMIC_LOADW_MAX);
+    OPCODE(ATOMIC_LOADW_UMIN);
+    OPCODE(ATOMIC_LOADW_UMAX);
+    OPCODE(ATOMIC_CMP_SWAPW);
+  }
+  return NULL;
+#undef OPCODE
+}
+
+//===----------------------------------------------------------------------===//
+// Custom insertion
+//===----------------------------------------------------------------------===//
+
+// Create a new basic block after MBB.
+static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
+  MachineFunction &MF = *MBB->getParent();
+  MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+  MF.insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
+  return NewMBB;
+}
+
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
+                                          MachineBasicBlock *MBB) {
+  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+  NewMBB->splice(NewMBB->begin(), MBB,
+                 llvm::next(MachineBasicBlock::iterator(MI)),
+                 MBB->end());
+  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+  return NewMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitSelect(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+
+  unsigned DestReg  = MI->getOperand(0).getReg();
+  unsigned TrueReg  = MI->getOperand(1).getReg();
+  unsigned FalseReg = MI->getOperand(2).getReg();
+  unsigned CCMask   = MI->getOperand(3).getImm();
+  DebugLoc DL       = MI->getDebugLoc();
+
+  MachineBasicBlock *StartMBB = MBB;
+  MachineBasicBlock *JoinMBB  = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+
+  //  StartMBB:
+  //   ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   jCC JoinMBB
+  //   # fallthrough to FalseMBB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB);
+  MBB->addSuccessor(JoinMBB);
+  MBB->addSuccessor(FalseMBB);
+
+  //  FalseMBB:
+  //   # fallthrough to JoinMBB
+  MBB = FalseMBB;
+  MBB->addSuccessor(JoinMBB);
+
+  //  JoinMBB:
+  //   %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
+  //  ...
+  MBB = JoinMBB;
+  BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
+    .addReg(TrueReg).addMBB(StartMBB)
+    .addReg(FalseReg).addMBB(FalseMBB);
+
+  MI->eraseFromParent();
+  return JoinMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
+// or ATOMIC_SWAP{,W} instruction MI.  BinOpcode is the instruction that
+// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
+// BitSize is the width of the field in bits, or 0 if this is a partword
+// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
+// is one of the operands.  Invert says whether the field should be
+// inverted after performing BinOpcode (e.g. for NAND).
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned BinOpcode,
+                                            unsigned BitSize,
+                                            bool Invert) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+  bool IsSubWord = (BitSize < 32);
+
+  // Extract the operands.  Base can be a register or a frame index.
+  // Src2 can be a register or immediate.
+  unsigned Dest        = MI->getOperand(0).getReg();
+  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
+  int64_t Disp         = MI->getOperand(2).getImm();
+  MachineOperand Src2  = earlyUseOperand(MI->getOperand(3));
+  unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
+  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
+  DebugLoc DL          = MI->getDebugLoc();
+  if (IsSubWord)
+    BitSize = MI->getOperand(6).getImm();
+
+  // Subword operations use 32-bit registers.
+  const TargetRegisterClass *RC = (BitSize <= 32 ?
+                                   &SystemZ::GR32BitRegClass :
+                                   &SystemZ::GR64BitRegClass);
+  unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
+  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+  // Get the right opcodes for the displacement.
+  LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
+  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+  assert(LOpcode && CSOpcode && "Displacement out of range");
+
+  // Create virtual registers for temporary results.
+  unsigned OrigVal       = MRI.createVirtualRegister(RC);
+  unsigned OldVal        = MRI.createVirtualRegister(RC);
+  unsigned NewVal        = (BinOpcode || IsSubWord ?
+                            MRI.createVirtualRegister(RC) : Src2.getReg());
+  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+  // Insert a basic block for the main loop.
+  MachineBasicBlock *StartMBB = MBB;
+  MachineBasicBlock *DoneMBB  = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
+
+  //  StartMBB:
+  //   ...
+  //   %OrigVal = L Disp(%Base)
+  //   # fall through to LoopMMB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
+    .addOperand(Base).addImm(Disp).addReg(0);
+  MBB->addSuccessor(LoopMBB);
+
+  //  LoopMBB:
+  //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
+  //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+  //   %RotatedNewVal = OP %RotatedOldVal, %Src2
+  //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
+  //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
+  //   JNE LoopMBB
+  //   # fall through to DoneMMB
+  MBB = LoopMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+    .addReg(OrigVal).addMBB(StartMBB)
+    .addReg(Dest).addMBB(LoopMBB);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+      .addReg(OldVal).addReg(BitShift).addImm(0);
+  if (Invert) {
+    // Perform the operation normally and then invert every bit of the field.
+    unsigned Tmp = MRI.createVirtualRegister(RC);
+    BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
+      .addReg(RotatedOldVal).addOperand(Src2);
+    if (BitSize < 32)
+      // XILF with the upper BitSize bits set.
+      BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+        .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
+    else if (BitSize == 32)
+      // XILF with every bit set.
+      BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+        .addReg(Tmp).addImm(~uint32_t(0));
+    else {
+      // Use LCGR and add -1 to the result, which is more compact than
+      // an XILF, XILH pair.
+      unsigned Tmp2 = MRI.createVirtualRegister(RC);
+      BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
+      BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
+        .addReg(Tmp2).addImm(-1);
+    }
+  } else if (BinOpcode)
+    // A simply binary operation.
+    BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
+      .addReg(RotatedOldVal).addOperand(Src2);
+  else if (IsSubWord)
+    // Use RISBG to rotate Src2 into position and use it to replace the
+    // field in RotatedOldVal.
+    BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
+      .addReg(RotatedOldVal).addReg(Src2.getReg())
+      .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+      .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+    .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+  MBB->addSuccessor(LoopMBB);
+  MBB->addSuccessor(DoneMBB);
+
+  MI->eraseFromParent();
+  return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo
+// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI.  CompareOpcode is the
+// instruction that should be used to compare the current field with the
+// minimum or maximum value.  KeepOldMask is the BRC condition-code mask
+// for when the current field should be kept.  BitSize is the width of
+// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned CompareOpcode,
+                                            unsigned KeepOldMask,
+                                            unsigned BitSize) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+  bool IsSubWord = (BitSize < 32);
+
+  // Extract the operands.  Base can be a register or a frame index.
+  unsigned Dest        = MI->getOperand(0).getReg();
+  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
+  int64_t  Disp        = MI->getOperand(2).getImm();
+  unsigned Src2        = MI->getOperand(3).getReg();
+  unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
+  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
+  DebugLoc DL          = MI->getDebugLoc();
+  if (IsSubWord)
+    BitSize = MI->getOperand(6).getImm();
+
+  // Subword operations use 32-bit registers.
+  const TargetRegisterClass *RC = (BitSize <= 32 ?
+                                   &SystemZ::GR32BitRegClass :
+                                   &SystemZ::GR64BitRegClass);
+  unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
+  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+  // Get the right opcodes for the displacement.
+  LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
+  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+  assert(LOpcode && CSOpcode && "Displacement out of range");
+
+  // Create virtual registers for temporary results.
+  unsigned OrigVal       = MRI.createVirtualRegister(RC);
+  unsigned OldVal        = MRI.createVirtualRegister(RC);
+  unsigned NewVal        = MRI.createVirtualRegister(RC);
+  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+  unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
+  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+  // Insert 3 basic blocks for the loop.
+  MachineBasicBlock *StartMBB  = MBB;
+  MachineBasicBlock *DoneMBB   = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *LoopMBB   = emitBlockAfter(StartMBB);
+  MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
+  MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+
+  //  StartMBB:
+  //   ...
+  //   %OrigVal     = L Disp(%Base)
+  //   # fall through to LoopMMB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
+    .addOperand(Base).addImm(Disp).addReg(0);
+  MBB->addSuccessor(LoopMBB);
+
+  //  LoopMBB:
+  //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
+  //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+  //   CompareOpcode %RotatedOldVal, %Src2
+  //   BRCL KeepOldMask, UpdateMBB
+  MBB = LoopMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+    .addReg(OrigVal).addMBB(StartMBB)
+    .addReg(Dest).addMBB(UpdateMBB);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+      .addReg(OldVal).addReg(BitShift).addImm(0);
+  BuildMI(MBB, DL, TII->get(CompareOpcode))
+    .addReg(RotatedOldVal).addReg(Src2);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL))
+    .addImm(KeepOldMask).addMBB(UpdateMBB);
+  MBB->addSuccessor(UpdateMBB);
+  MBB->addSuccessor(UseAltMBB);
+
+  //  UseAltMBB:
+  //   %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
+  //   # fall through to UpdateMMB
+  MBB = UseAltMBB;
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
+      .addReg(RotatedOldVal).addReg(Src2)
+      .addImm(32).addImm(31 + BitSize).addImm(0);
+  MBB->addSuccessor(UpdateMBB);
+
+  //  UpdateMBB:
+  //   %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
+  //                        [ %RotatedAltVal, UseAltMBB ]
+  //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
+  //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
+  //   JNE LoopMBB
+  //   # fall through to DoneMMB
+  MBB = UpdateMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
+    .addReg(RotatedOldVal).addMBB(LoopMBB)
+    .addReg(RotatedAltVal).addMBB(UseAltMBB);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+      .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+    .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+  MBB->addSuccessor(LoopMBB);
+  MBB->addSuccessor(DoneMBB);
+
+  MI->eraseFromParent();
+  return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
+// instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
+                                          MachineBasicBlock *MBB) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+
+  // Extract the operands.  Base can be a register or a frame index.
+  unsigned Dest        = MI->getOperand(0).getReg();
+  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
+  int64_t  Disp        = MI->getOperand(2).getImm();
+  unsigned OrigCmpVal  = MI->getOperand(3).getReg();
+  unsigned OrigSwapVal = MI->getOperand(4).getReg();
+  unsigned BitShift    = MI->getOperand(5).getReg();
+  unsigned NegBitShift = MI->getOperand(6).getReg();
+  int64_t  BitSize     = MI->getOperand(7).getImm();
+  DebugLoc DL          = MI->getDebugLoc();
+
+  const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
+
+  // Get the right opcodes for the displacement.
+  unsigned LOpcode  = TII->getOpcodeForOffset(SystemZ::L,  Disp);
+  unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
+  assert(LOpcode && CSOpcode && "Displacement out of range");
+
+  // Create virtual registers for temporary results.
+  unsigned OrigOldVal   = MRI.createVirtualRegister(RC);
+  unsigned OldVal       = MRI.createVirtualRegister(RC);
+  unsigned CmpVal       = MRI.createVirtualRegister(RC);
+  unsigned SwapVal      = MRI.createVirtualRegister(RC);
+  unsigned StoreVal     = MRI.createVirtualRegister(RC);
+  unsigned RetryOldVal  = MRI.createVirtualRegister(RC);
+  unsigned RetryCmpVal  = MRI.createVirtualRegister(RC);
+  unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
+
+  // Insert 2 basic blocks for the loop.
+  MachineBasicBlock *StartMBB = MBB;
+  MachineBasicBlock *DoneMBB  = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
+  MachineBasicBlock *SetMBB   = emitBlockAfter(LoopMBB);
+
+  //  StartMBB:
+  //   ...
+  //   %OrigOldVal     = L Disp(%Base)
+  //   # fall through to LoopMMB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
+    .addOperand(Base).addImm(Disp).addReg(0);
+  MBB->addSuccessor(LoopMBB);
+
+  //  LoopMBB:
+  //   %OldVal        = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
+  //   %CmpVal        = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
+  //   %SwapVal       = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
+  //   %Dest          = RLL %OldVal, BitSize(%BitShift)
+  //                      ^^ The low BitSize bits contain the field
+  //                         of interest.
+  //   %RetryCmpVal   = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
+  //                      ^^ Replace the upper 32-BitSize bits of the
+  //                         comparison value with those that we loaded,
+  //                         so that we can use a full word comparison.
+  //   CR %Dest, %RetryCmpVal
+  //   JNE DoneMBB
+  //   # Fall through to SetMBB
+  MBB = LoopMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+    .addReg(OrigOldVal).addMBB(StartMBB)
+    .addReg(RetryOldVal).addMBB(SetMBB);
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
+    .addReg(OrigCmpVal).addMBB(StartMBB)
+    .addReg(RetryCmpVal).addMBB(SetMBB);
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
+    .addReg(OrigSwapVal).addMBB(StartMBB)
+    .addReg(RetrySwapVal).addMBB(SetMBB);
+  BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
+    .addReg(OldVal).addReg(BitShift).addImm(BitSize);
+  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
+    .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+  BuildMI(MBB, DL, TII->get(SystemZ::CR))
+    .addReg(Dest).addReg(RetryCmpVal);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB);
+  MBB->addSuccessor(DoneMBB);
+  MBB->addSuccessor(SetMBB);
+
+  //  SetMBB:
+  //   %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
+  //                      ^^ Replace the upper 32-BitSize bits of the new
+  //                         value with those that we loaded.
+  //   %StoreVal    = RLL %RetrySwapVal, -BitSize(%NegBitShift)
+  //                      ^^ Rotate the new field to its proper position.
+  //   %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
+  //   JNE LoopMBB
+  //   # fall through to ExitMMB
+  MBB = SetMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
+    .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+  BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
+    .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
+  BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
+    .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+  MBB->addSuccessor(LoopMBB);
+  MBB->addSuccessor(DoneMBB);
+
+  MI->eraseFromParent();
+  return DoneMBB;
+}
+
+// Emit an extension from a GR32 or GR64 to a GR128.  ClearEven is true
+// if the high register of the GR128 value must be cleared or false if
+// it's "don't care".  SubReg is subreg_odd32 when extending a GR32
+// and subreg_odd when extending a GR64.
+MachineBasicBlock *
+SystemZTargetLowering::emitExt128(MachineInstr *MI,
+                                  MachineBasicBlock *MBB,
+                                  bool ClearEven, unsigned SubReg) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned Dest  = MI->getOperand(0).getReg();
+  unsigned Src   = MI->getOperand(1).getReg();
+  unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+
+  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
+  if (ClearEven) {
+    unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+    unsigned Zero64   = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+    BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
+      .addImm(0);
+    BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
+      .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high);
+    In128 = NewIn128;
+  }
+  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
+    .addReg(In128).addReg(Src).addImm(SubReg);
+
+  MI->eraseFromParent();
+  return MBB;
+}
+
+MachineBasicBlock *SystemZTargetLowering::
+EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::Select32:
+  case SystemZ::SelectF32:
+  case SystemZ::Select64:
+  case SystemZ::SelectF64:
+  case SystemZ::SelectF128:
+    return emitSelect(MI, MBB);
+
+  case SystemZ::AEXT128_64:
+    return emitExt128(MI, MBB, false, SystemZ::subreg_low);
+  case SystemZ::ZEXT128_32:
+    return emitExt128(MI, MBB, true, SystemZ::subreg_low32);
+  case SystemZ::ZEXT128_64:
+    return emitExt128(MI, MBB, true, SystemZ::subreg_low);
+
+  case SystemZ::ATOMIC_SWAPW:
+    return emitAtomicLoadBinary(MI, MBB, 0, 0);
+  case SystemZ::ATOMIC_SWAP_32:
+    return emitAtomicLoadBinary(MI, MBB, 0, 32);
+  case SystemZ::ATOMIC_SWAP_64:
+    return emitAtomicLoadBinary(MI, MBB, 0, 64);
+
+  case SystemZ::ATOMIC_LOADW_AR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
+  case SystemZ::ATOMIC_LOADW_AFI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
+  case SystemZ::ATOMIC_LOAD_AR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
+  case SystemZ::ATOMIC_LOAD_AHI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
+  case SystemZ::ATOMIC_LOAD_AFI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
+  case SystemZ::ATOMIC_LOAD_AGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
+  case SystemZ::ATOMIC_LOAD_AGHI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
+  case SystemZ::ATOMIC_LOAD_AGFI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
+
+  case SystemZ::ATOMIC_LOADW_SR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
+  case SystemZ::ATOMIC_LOAD_SR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
+  case SystemZ::ATOMIC_LOAD_SGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
+
+  case SystemZ::ATOMIC_LOADW_NR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
+  case SystemZ::ATOMIC_LOADW_NILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0);
+  case SystemZ::ATOMIC_LOAD_NR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
+  case SystemZ::ATOMIC_LOAD_NILL32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32);
+  case SystemZ::ATOMIC_LOAD_NILH32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32);
+  case SystemZ::ATOMIC_LOAD_NILF32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32);
+  case SystemZ::ATOMIC_LOAD_NGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
+  case SystemZ::ATOMIC_LOAD_NILL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64);
+  case SystemZ::ATOMIC_LOAD_NILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64);
+  case SystemZ::ATOMIC_LOAD_NIHL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64);
+  case SystemZ::ATOMIC_LOAD_NIHH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64);
+  case SystemZ::ATOMIC_LOAD_NILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64);
+  case SystemZ::ATOMIC_LOAD_NIHF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64);
+
+  case SystemZ::ATOMIC_LOADW_OR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
+  case SystemZ::ATOMIC_LOADW_OILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0);
+  case SystemZ::ATOMIC_LOAD_OR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
+  case SystemZ::ATOMIC_LOAD_OILL32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32);
+  case SystemZ::ATOMIC_LOAD_OILH32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32);
+  case SystemZ::ATOMIC_LOAD_OILF32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32);
+  case SystemZ::ATOMIC_LOAD_OGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
+  case SystemZ::ATOMIC_LOAD_OILL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64);
+  case SystemZ::ATOMIC_LOAD_OILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64);
+  case SystemZ::ATOMIC_LOAD_OIHL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64);
+  case SystemZ::ATOMIC_LOAD_OIHH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64);
+  case SystemZ::ATOMIC_LOAD_OILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64);
+  case SystemZ::ATOMIC_LOAD_OIHF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64);
+
+  case SystemZ::ATOMIC_LOADW_XR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
+  case SystemZ::ATOMIC_LOADW_XILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0);
+  case SystemZ::ATOMIC_LOAD_XR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
+  case SystemZ::ATOMIC_LOAD_XILF32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32);
+  case SystemZ::ATOMIC_LOAD_XGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
+  case SystemZ::ATOMIC_LOAD_XILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64);
+  case SystemZ::ATOMIC_LOAD_XIHF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64);
+
+  case SystemZ::ATOMIC_LOADW_NRi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
+  case SystemZ::ATOMIC_LOADW_NILHi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true);
+  case SystemZ::ATOMIC_LOAD_NRi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
+  case SystemZ::ATOMIC_LOAD_NILL32i:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true);
+  case SystemZ::ATOMIC_LOAD_NILH32i:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true);
+  case SystemZ::ATOMIC_LOAD_NILF32i:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true);
+  case SystemZ::ATOMIC_LOAD_NGRi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
+  case SystemZ::ATOMIC_LOAD_NILLi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true);
+  case SystemZ::ATOMIC_LOAD_NILHi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true);
+  case SystemZ::ATOMIC_LOAD_NIHLi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true);
+  case SystemZ::ATOMIC_LOAD_NIHHi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true);
+  case SystemZ::ATOMIC_LOAD_NILFi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true);
+  case SystemZ::ATOMIC_LOAD_NIHFi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true);
+
+  case SystemZ::ATOMIC_LOADW_MIN:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_LE, 0);
+  case SystemZ::ATOMIC_LOAD_MIN_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_LE, 32);
+  case SystemZ::ATOMIC_LOAD_MIN_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+                                SystemZ::CCMASK_CMP_LE, 64);
+
+  case SystemZ::ATOMIC_LOADW_MAX:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_GE, 0);
+  case SystemZ::ATOMIC_LOAD_MAX_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_GE, 32);
+  case SystemZ::ATOMIC_LOAD_MAX_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+                                SystemZ::CCMASK_CMP_GE, 64);
+
+  case SystemZ::ATOMIC_LOADW_UMIN:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_LE, 0);
+  case SystemZ::ATOMIC_LOAD_UMIN_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_LE, 32);
+  case SystemZ::ATOMIC_LOAD_UMIN_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+                                SystemZ::CCMASK_CMP_LE, 64);
+
+  case SystemZ::ATOMIC_LOADW_UMAX:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_GE, 0);
+  case SystemZ::ATOMIC_LOAD_UMAX_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_GE, 32);
+  case SystemZ::ATOMIC_LOAD_UMAX_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+                                SystemZ::CCMASK_CMP_GE, 64);
+
+  case SystemZ::ATOMIC_CMP_SWAPW:
+    return emitAtomicCmpSwapW(MI, MBB);
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
+  }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 0000000..eea820c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,212 @@
+//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+namespace SystemZISD {
+  enum {
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+    // Return with a flag operand.  Operand 0 is the chain operand.
+    RET_FLAG,
+
+    // Calls a function.  Operand 0 is the chain operand and operand 1
+    // is the target address.  The arguments start at operand 2.
+    // There is an optional glue operand at the end.
+    CALL,
+
+    // Wraps a TargetGlobalAddress that should be loaded using PC-relative
+    // accesses (LARL).  Operand 0 is the address.
+    PCREL_WRAPPER,
+
+    // Signed integer and floating-point comparisons.  The operands are the
+    // two values to compare.
+    CMP,
+
+    // Likewise unsigned integer comparison.
+    UCMP,
+
+    // Branches if a condition is true.  Operand 0 is the chain operand;
+    // operand 1 is the 4-bit condition-code mask, with bit N in
+    // big-endian order meaning "branch if CC=N"; operand 2 is the
+    // target block and operand 3 is the flag operand.
+    BR_CCMASK,
+
+    // Selects between operand 0 and operand 1.  Operand 2 is the
+    // mask of condition-code values for which operand 0 should be
+    // chosen over operand 1; it has the same form as BR_CCMASK.
+    // Operand 3 is the flag operand.
+    SELECT_CCMASK,
+
+    // Evaluates to the gap between the stack pointer and the
+    // base of the dynamically-allocatable area.
+    ADJDYNALLOC,
+
+    // Extracts the value of a 32-bit access register.  Operand 0 is
+    // the number of the register.
+    EXTRACT_ACCESS,
+
+    // Wrappers around the ISD opcodes of the same name.  The output and
+    // first input operands are GR128s.  The trailing numbers are the
+    // widths of the second operand in bits.
+    UMUL_LOHI64,
+    SDIVREM64,
+    UDIVREM32,
+    UDIVREM64,
+
+    // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
+    // ATOMIC_LOAD_<op>.
+    //
+    // Operand 0: the address of the containing 32-bit-aligned field
+    // Operand 1: the second operand of <op>, in the high bits of an i32
+    //            for everything except ATOMIC_SWAPW
+    // Operand 2: how many bits to rotate the i32 left to bring the first
+    //            operand into the high bits
+    // Operand 3: the negative of operand 2, for rotating the other way
+    // Operand 4: the width of the field in bits (8 or 16)
+    ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE,
+    ATOMIC_LOADW_ADD,
+    ATOMIC_LOADW_SUB,
+    ATOMIC_LOADW_AND,
+    ATOMIC_LOADW_OR,
+    ATOMIC_LOADW_XOR,
+    ATOMIC_LOADW_NAND,
+    ATOMIC_LOADW_MIN,
+    ATOMIC_LOADW_MAX,
+    ATOMIC_LOADW_UMIN,
+    ATOMIC_LOADW_UMAX,
+
+    // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
+    //
+    // Operand 0: the address of the containing 32-bit-aligned field
+    // Operand 1: the compare value, in the low bits of an i32
+    // Operand 2: the swap value, in the low bits of an i32
+    // Operand 3: how many bits to rotate the i32 left to bring the first
+    //            operand into the high bits
+    // Operand 4: the negative of operand 2, for rotating the other way
+    // Operand 5: the width of the field in bits (8 or 16)
+    ATOMIC_CMP_SWAPW
+  };
+}
+
+class SystemZSubtarget;
+class SystemZTargetMachine;
+
+class SystemZTargetLowering : public TargetLowering {
+public:
+  explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+  // Override TargetLowering.
+  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE {
+    return MVT::i32;
+  }
+  virtual EVT getSetCCResultType(EVT VT) const {
+    return MVT::i32;
+  }
+  virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE {
+    return true;
+  }
+  virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
+  virtual std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+                                 EVT VT) const LLVM_OVERRIDE;
+  virtual TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE;
+  virtual TargetLowering::ConstraintWeight
+    getSingleConstraintMatchWeight(AsmOperandInfo &info,
+                                   const char *constraint) const LLVM_OVERRIDE;
+  virtual void
+    LowerAsmOperandForConstraint(SDValue Op,
+                                 std::string &Constraint,
+                                 std::vector<SDValue> &Ops,
+                                 SelectionDAG &DAG) const LLVM_OVERRIDE;
+  virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI,
+                                MachineBasicBlock *BB) const LLVM_OVERRIDE;
+  virtual SDValue LowerOperation(SDValue Op,
+                                 SelectionDAG &DAG) const LLVM_OVERRIDE;
+  virtual SDValue
+    LowerFormalArguments(SDValue Chain,
+                         CallingConv::ID CallConv, bool isVarArg,
+                         const SmallVectorImpl<ISD::InputArg> &Ins,
+                         DebugLoc DL, SelectionDAG &DAG,
+                         SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE;
+  virtual SDValue
+    LowerCall(CallLoweringInfo &CLI,
+              SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE;
+
+  virtual SDValue
+    LowerReturn(SDValue Chain,
+                CallingConv::ID CallConv, bool IsVarArg,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                DebugLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE;
+
+private:
+  const SystemZSubtarget &Subtarget;
+  const SystemZTargetMachine &TM;
+
+  // Implement LowerOperation for individual opcodes.
+  SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
+                             SelectionDAG &DAG) const;
+  SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+                                SelectionDAG &DAG) const;
+  SDValue lowerBlockAddress(BlockAddressSDNode *Node,
+                            SelectionDAG &DAG) const;
+  SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
+  SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+  SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG,
+                           unsigned Opcode) const;
+  SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+
+  // Implement EmitInstrWithCustomInserter for individual operation types.
+  MachineBasicBlock *emitSelect(MachineInstr *MI,
+                                MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitExt128(MachineInstr *MI,
+                                MachineBasicBlock *MBB,
+                                bool ClearEven, unsigned SubReg) const;
+  MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI,
+                                          MachineBasicBlock *BB,
+                                          unsigned BinOpcode, unsigned BitSize,
+                                          bool Invert = false) const;
+  MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          unsigned CompareOpcode,
+                                          unsigned KeepOldMask,
+                                          unsigned BitSize) const;
+  MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
+                                        MachineBasicBlock *BB) const;
+};
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 0000000..fb699b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,48 @@
+//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// Add a BDX memory reference for frame object FI to MIB.
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI) {
+  MachineInstr *MI = MIB;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Flags = 0;
+  if (MCID.mayLoad())
+    Flags |= MachineMemOperand::MOLoad;
+  if (MCID.mayStore())
+    Flags |= MachineMemOperand::MOStore;
+  int64_t Offset = 0;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(
+                              PseudoSourceValue::getFixedStack(FI), Offset),
+                            Flags, MFFrame->getObjectSize(FI),
+                            MFFrame->getObjectAlignment(FI));
+  return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 0000000..7c9f0e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,318 @@
+//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control-flow instructions
+//===----------------------------------------------------------------------===//
+
+// C's ?: operator for floating-point operands.
+def SelectF32  : SelectWrapper<FP32>;
+def SelectF64  : SelectWrapper<FP64>;
+def SelectF128 : SelectWrapper<FP128>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero.
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+  def LZER : InherentRRE<"lzer", 0xB374, FP32,  (fpimm0)>;
+  def LZDR : InherentRRE<"lzdr", 0xB375, FP64,  (fpimm0)>;
+  def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>;
+}
+
+// Moves between two floating-point registers.
+let neverHasSideEffects = 1 in {
+  def LER : UnaryRR <"ler", 0x38,   null_frag, FP32,  FP32>;
+  def LDR : UnaryRR <"ldr", 0x28,   null_frag, FP64,  FP64>;
+  def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
+}
+
+// Moves between 64-bit integer and floating-point registers.
+def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
+def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
+
+// fcopysign with an FP32 result.
+let isCodeGenOnly = 1 in {
+  def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>;
+  def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>;
+}
+
+// The sign of an FP128 is in the high register.  Give the CPSDRsd
+// operands in R1, R2, R3 order.
+def : Pat<(fcopysign FP32:$src1, FP128:$src2),
+          (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>;
+
+// fcopysign with an FP64 result.
+let isCodeGenOnly = 1 in
+  def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>;
+def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>;
+
+// The sign of an FP128 is in the high register.  Give the CPSDRdd
+// operands in R1, R2, R3 order.
+def : Pat<(fcopysign FP64:$src1, FP128:$src2),
+          (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>;
+
+// fcopysign with an FP128 result.  Use "upper" as the high half and leave
+// the low half as-is.
+class CopySign128<RegisterOperand cls, dag upper>
+  : Pat<(fcopysign FP128:$src1, cls:$src2),
+        (INSERT_SUBREG FP128:$src1, upper, subreg_high)>;
+
+// Give the CPSDR* operands in R1, R2, R3 order.
+def : CopySign128<FP32,  (CPSDRds FP32:$src2,
+                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP64,  (CPSDRdd FP64:$src2,
+                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high),
+                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+
+//===----------------------------------------------------------------------===//
+// Load instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+  defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32>;
+  defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def LX : Pseudo<(outs FP128:$dst), (ins bdxaddr20only128:$src),
+                     [(set FP128:$dst, (load bdxaddr20only128:$src))]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Store instructions
+//===----------------------------------------------------------------------===//
+
+let SimpleBDXStore = 1 in {
+  defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>;
+  defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def STX : Pseudo<(outs), (ins FP128:$src, bdxaddr20only128:$dst),
+                     [(store FP128:$src, bdxaddr20only128:$dst)]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations, rounding
+// according to the current mode.  The destination of LEXBR and LDXBR
+// is a 128-bit value, but only the first register of the pair is used.
+def LEDBR : UnaryRRE<"ledbr", 0xB344, fround,    FP32,  FP64>;
+def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+
+def : Pat<(f32 (fround FP128:$src)),
+          (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>;
+def : Pat<(f64 (fround FP128:$src)),
+          (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>;
+
+// Extend register floating-point values to wider representations.
+def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64,  FP32>;
+def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>;
+
+// Extend memory floating-point values to wider representations.
+def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>;
+def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>;
+def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>;
+
+// Convert a signed integer register value to a floating-point one.
+let Defs = [PSW] in {
+  def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
+  def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
+  def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+
+  def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
+  def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
+  def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+}
+
+// Convert a floating-point register value to a signed integer value,
+// with the second operand (modifier M3) specifying the rounding mode.
+let Defs = [PSW] in {
+  def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>;
+  def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>;
+  def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>;
+
+  def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>;
+  def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>;
+  def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>;
+}
+
+// fp_to_sint always rounds towards zero, which is modifier value 5.
+def : Pat<(i32 (fp_to_sint FP32:$src)),  (CFEBR FP32:$src,  5)>;
+def : Pat<(i32 (fp_to_sint FP64:$src)),  (CFDBR FP64:$src,  5)>;
+def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>;
+
+def : Pat<(i64 (fp_to_sint FP32:$src)),  (CGEBR FP32:$src,  5)>;
+def : Pat<(i64 (fp_to_sint FP64:$src)),  (CGDBR FP64:$src,  5)>;
+def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>;
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Negation (Load Complement).
+let Defs = [PSW] in {
+  def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32,  FP32>;
+  def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64,  FP64>;
+  def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>;
+}
+
+// Absolute value (Load Positive).
+let Defs = [PSW] in {
+  def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32,  FP32>;
+  def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64,  FP64>;
+  def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>;
+}
+
+// Negative absolute value (Load Negative).
+let Defs = [PSW] in {
+  def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32,  FP32>;
+  def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64,  FP64>;
+  def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>;
+}
+
+// Square root.
+def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32,  FP32>;
+def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64,  FP64>;
+def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+
+def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32>;
+def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>;
+
+// Round to an integer, with the second operand (modifier M3) specifying
+// the rounding mode.
+//
+// These forms always check for inexact conditions.  z196 added versions
+// that allow this to suppressed (as for fnearbyint), but we don't yet
+// support -march=z196.
+let Defs = [PSW] in {
+  def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32,  FP32>;
+  def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64,  FP64>;
+  def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>;
+}
+
+// frint rounds according to the current mode (modifier 0) and detects
+// inexact conditions.
+def : Pat<(frint FP32:$src),  (FIEBR FP32:$src,  0)>;
+def : Pat<(frint FP64:$src),  (FIDBR FP64:$src,  0)>;
+def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [PSW] in {
+  let isCommutable = 1 in {
+    def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32,  FP32>;
+    def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64,  FP64>;
+    def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+  }
+  def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>;
+  def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>;
+}
+
+// Subtraction.
+let Defs = [PSW] in {
+  def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32,  FP32>;
+  def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64,  FP64>;
+  def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+
+  def SEB : BinaryRXE<"seb",  0xED0B, fsub, FP32, load>;
+  def SDB : BinaryRXE<"sdb",  0xED1B, fsub, FP64, load>;
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+  def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32,  FP32>;
+  def MDBR  : BinaryRRE<"mdbr",  0xB31C, fmul, FP64,  FP64>;
+  def MXBR  : BinaryRRE<"mxbr",  0xB34C, fmul, FP128, FP128>;
+}
+def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>;
+def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load>;
+
+// f64 multiplication of two FP32 registers.
+def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
+          (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+                                FP32:$src1, subreg_32bit), FP32:$src2)>;
+
+// f64 multiplication of an FP32 register and an f32 memory.
+def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>;
+def : Pat<(fmul (f64 (fextend FP32:$src1)),
+                (f64 (extloadf32 bdxaddr12only:$addr))),
+          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit),
+                bdxaddr12only:$addr)>;
+
+// f128 multiplication of two FP64 registers.
+def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))),
+          (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+                                FP64:$src1, subreg_high), FP64:$src2)>;
+
+// f128 multiplication of an FP64 register and an f64 memory.
+def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>;
+def : Pat<(fmul (f128 (fextend FP64:$src1)),
+                (f128 (extloadf64 bdxaddr12only:$addr))),
+          (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high),
+                bdxaddr12only:$addr)>;
+
+// Fused multiply-add.
+def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>;
+def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>;
+
+def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>;
+def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>;
+
+// Fused multiply-subtract.
+def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>;
+def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>;
+
+def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>;
+def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>;
+
+// Division.
+def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32,  FP32>;
+def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64,  FP64>;
+def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+
+def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>;
+def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>;
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32,  FP32>;
+  def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64,  FP64>;
+  def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>;
+
+  def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>;
+  def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes
+//===----------------------------------------------------------------------===//
+
+def : Pat<(f32  fpimmneg0), (LCEBR (LZER))>;
+def : Pat<(f64  fpimmneg0), (LCDBR (LZDR))>;
+def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 0000000..b32b7eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,987 @@
+//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Basic SystemZ instruction definition
+//===----------------------------------------------------------------------===//
+
+class InstSystemZ<int size, dag outs, dag ins, string asmstr,
+                  list<dag> pattern> : Instruction {
+  let Namespace = "SystemZ";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let Size = size;
+  let Pattern = pattern;
+  let AsmString = asmstr;
+
+  // Used to identify a group of related instructions, such as ST and STY.
+  string Function = "";
+
+  // "12" for an instruction that has a ...Y equivalent, "20" for that
+  // ...Y equivalent.
+  string PairType = "none";
+
+  // True if this instruction is a simple D(X,B) load of a register
+  // (with no sign or zero extension).
+  bit SimpleBDXLoad = 0;
+
+  // True if this instruction is a simple D(X,B) store of a register
+  // (with no truncation).
+  bit SimpleBDXStore = 0;
+
+  // True if this instruction has a 20-bit displacement field.
+  bit Has20BitOffset = 0;
+
+  // True if addresses in this instruction have an index register.
+  bit HasIndex = 0;
+
+  // True if this is a 128-bit pseudo instruction that combines two 64-bit
+  // operations.
+  bit Is128Bit = 0;
+
+  let TSFlags{0} = SimpleBDXLoad;
+  let TSFlags{1} = SimpleBDXStore;
+  let TSFlags{2} = Has20BitOffset;
+  let TSFlags{3} = HasIndex;
+  let TSFlags{4} = Is128Bit;
+}
+
+//===----------------------------------------------------------------------===//
+// Mappings between instructions
+//===----------------------------------------------------------------------===//
+
+// Return the version of an instruction that has an unsigned 12-bit
+// displacement.
+def getDisp12Opcode : InstrMapping {
+  let FilterClass = "InstSystemZ";
+  let RowFields = ["Function"];
+  let ColFields = ["PairType"];
+  let KeyCol = ["20"];
+  let ValueCols = [["12"]];
+}
+
+// Return the version of an instruction that has a signed 20-bit displacement.
+def getDisp20Opcode : InstrMapping {
+  let FilterClass = "InstSystemZ";
+  let RowFields = ["Function"];
+  let ColFields = ["PairType"];
+  let KeyCol = ["12"];
+  let ValueCols = [["20"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+//
+// Formats are specified using operand field declarations of the form:
+//
+//   bits<4> Rn : register input or output for operand n
+//   bits<m> In : immediate value of width m for operand n
+//   bits<4> Bn : base register for address operand n
+//   bits<m> Dn : displacement value of width m for address operand n
+//   bits<4> Xn : index register for address operand n
+//   bits<4> Mn : mode value for operand n
+//
+// The operand numbers ("n" in the list above) follow the architecture manual,
+// but the fields are always declared in assembly order, so there are some
+// cases where operand "2" comes after operand "3".  For address operands,
+// the base register field is declared first, followed by the displacement,
+// followed by the index (if any).  This matches the bdaddr* and bdxaddr*
+// orders.
+//
+//===----------------------------------------------------------------------===//
+
+class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<16> I2;
+
+  let Inst{31-24} = op{11-4};
+  let Inst{23-20} = R1;
+  let Inst{19-16} = op{3-0};
+  let Inst{15-0}  = I2;
+}
+
+class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<8> I3;
+  bits<8> I4;
+  bits<8> I5;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = R2;
+  let Inst{31-24} = I3;
+  let Inst{23-16} = I4;
+  let Inst{15-8}  = I5;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<32> I2;
+
+  let Inst{47-40} = op{11-4};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = op{3-0};
+  let Inst{31-0}  = I2;
+}
+
+class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<2, outs, ins, asmstr, pattern> {
+  field bits<16> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+
+  let Inst{15-8} = op;
+  let Inst{7-4}  = R1;
+  let Inst{3-0}  = R2;
+}
+
+class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> R2;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = R1;
+  let Inst{11-8}  = 0;
+  let Inst{7-4}   = R3;
+  let Inst{3-0}   = R2;
+}
+
+class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+
+  let Inst{31-16} = op;
+  let Inst{15-8}  = 0;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
+class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<4> R3;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = R3;
+  let Inst{11-8}  = 0;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
+class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> B2;
+  bits<12> D2;
+  bits<4> X2;
+
+  let Inst{31-24} = op;
+  let Inst{23-20} = R1;
+  let Inst{19-16} = X2;
+  let Inst{15-12} = B2;
+  let Inst{11-0}  = D2;
+
+  let HasIndex = 1;
+}
+
+class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> B2;
+  bits<12> D2;
+  bits<4> X2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = X2;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2;
+  let Inst{15-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+
+  let HasIndex = 1;
+}
+
+class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> B2;
+  bits<12> D2;
+  bits<4> X2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R3;
+  let Inst{35-32} = X2;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2;
+  let Inst{15-12} = R1;
+  let Inst{11-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+
+  let HasIndex = 1;
+}
+
+class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> B2;
+  bits<20> D2;
+  bits<4> X2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = X2;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2{11-0};
+  let Inst{15-8}  = D2{19-12};
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+  let HasIndex = 1;
+}
+
+class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> B2;
+  bits<12> D2;
+
+  let Inst{31-24} = op;
+  let Inst{23-20} = R1;
+  let Inst{19-16} = R3;
+  let Inst{15-12} = B2;
+  let Inst{11-0}  = D2;
+}
+
+class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> B2;
+  bits<20> D2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = R3;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2{11-0};
+  let Inst{15-8}  = D2{19-12};
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+}
+
+class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> B1;
+  bits<12> D1;
+  bits<8> I2;
+
+  let Inst{31-24} = op;
+  let Inst{23-16} = I2;
+  let Inst{15-12} = B1;
+  let Inst{11-0}  = D1;
+}
+
+class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> B1;
+  bits<12> D1;
+  bits<16> I2;
+
+  let Inst{47-32} = op;
+  let Inst{31-28} = B1;
+  let Inst{27-16} = D1;
+  let Inst{15-0}  = I2;
+}
+
+class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> B1;
+  bits<20> D1;
+  bits<8> I2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-32} = I2;
+  let Inst{31-28} = B1;
+  let Inst{27-16} = D1{11-0};
+  let Inst{15-8}  = D1{19-12};
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions with semantics
+//===----------------------------------------------------------------------===//
+//
+// These classes have the form <Category><Format>, where <Format> is one
+// of the formats defined above and where <Category> describes the inputs
+// and outputs.  <Category> can be one of:
+//
+//   Inherent:
+//     One register output operand and no input operands.
+//
+//   Store:
+//     One register or immediate input operand and one address input operand.
+//     The instruction stores the first operand to the address.
+//
+//     This category is used for both pure and truncating stores.
+//
+//   LoadMultiple:
+//     One address input operand and two explicit output operands.
+//     The instruction loads a range of registers from the address,
+//     with the explicit operands giving the first and last register
+//     to load.  Other loaded registers are added as implicit definitions.
+//
+//   StoreMultiple:
+//     Two explicit input register operands and an address operand.
+//     The instruction stores a range of registers to the address,
+//     with the explicit operands giving the first and last register
+//     to store.  Other stored registers are added as implicit uses.
+//
+//   Unary:
+//     One register output operand and one input operand.  The input
+//     operand may be a register, immediate or memory.
+//
+//   Binary:
+//     One register output operand and two input operands.  The first
+//     input operand is always a register and he second may be a register,
+//     immediate or memory.
+//
+//   Shift:
+//     One register output operand and two input operands.  The first
+//     input operand is a register and the second has the same form as
+//     an address (although it isn't actually used to address memory).
+//
+//   Compare:
+//     Two input operands.  The first operand is always a register,
+//     the second may be a register, immediate or memory.
+//
+//   Ternary:
+//     One register output operand and three register input operands.
+//
+//   CmpSwap:
+//     One output operand and three input operands.  The first two
+//     operands are registers and the third is an address.  The instruction
+//     both reads from and writes to the address.
+//
+//   RotateSelect:
+//     One output operand and five input operands.  The first two operands
+//     are registers and the other three are immediates.
+//
+// The format determines which input operands are tied to output operands,
+// and also determines the shape of any address operand.
+//
+// Multiclasses of the form <Category><Format>Pair define two instructions,
+// one with <Category><Format> and one with <Category><Format>Y.  The name
+// of the first instruction has no suffix, the name of the second has
+// an extra "y".
+//
+//===----------------------------------------------------------------------===//
+
+class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                  dag src>
+  : InstRRE<opcode, (outs cls:$dst), (ins),
+            mnemonic#"\t$dst",
+            [(set cls:$dst, src)]> {
+  let R2 = 0;
+}
+
+class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr),
+            mnemonic#"\t$dst1, $dst2, $addr", []> {
+  let mayLoad = 1;
+}
+
+class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                 RegisterOperand cls>
+  : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr),
+            mnemonic#"\t$src, $addr",
+            [(operator cls:$src, pcrel32:$addr)]> {
+  let mayStore = 1;
+  // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+  // However, BDXs have two extra operands and are therefore 6 units more
+  // complex.
+  let AddedComplexity = 7;
+}
+
+class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls, AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs), (ins cls:$src, mode:$addr),
+           mnemonic#"\t$src, $addr",
+           [(operator cls:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls, AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr),
+            mnemonic#"\t$src, $addr",
+            [(operator cls:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                       SDPatternOperator operator, RegisterOperand cls> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+  }
+}
+
+class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr),
+            mnemonic#"\t$from, $to, $addr", []> {
+  let mayStore = 1;
+}
+
+class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              Immediate imm, AddressingMode mode = bdaddr12only>
+  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+           mnemonic#"\t$addr, $src",
+           [(operator imm:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               Immediate imm, AddressingMode mode = bdaddr20only>
+  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator imm:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               Immediate imm>
+  : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator imm:$src, bdaddr12only:$addr)]> {
+  let mayStore = 1;
+}
+
+multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+                       SDPatternOperator operator, Immediate imm> {
+  let Function = mnemonic in {
+    let PairType = "12" in
+      def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+  }
+}
+
+class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls1, RegisterOperand cls2>
+  : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src),
+           mnemonic#"\t$dst, $src",
+           [(set cls1:$dst, (operator cls2:$src))]>;
+
+class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src),
+            mnemonic#"\t$dst, $src",
+            [(set cls1:$dst, (operator cls2:$src))]>;
+
+class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+               RegisterOperand cls2>
+  : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode),
+            mnemonic#"\t$dst, $mode, $src", []>;
+
+class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+              RegisterOperand cls, Immediate imm>
+  : InstRI<opcode, (outs cls:$dst), (ins imm:$src),
+           mnemonic#"\t$dst, $src",
+           [(set cls:$dst, (operator imm:$src))]>;
+
+class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+               RegisterOperand cls, Immediate imm>
+  : InstRIL<opcode, (outs cls:$dst), (ins imm:$src),
+            mnemonic#"\t$dst, $src",
+            [(set cls:$dst, (operator imm:$src))]>;
+
+class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                 RegisterOperand cls>
+  : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr),
+            mnemonic#"\t$dst, $addr",
+            [(set cls:$dst, (operator pcrel32:$addr))]> {
+  let mayLoad = 1;
+  // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+  // However, BDXs have two extra operands and are therefore 6 units more
+  // complex.
+  let AddedComplexity = 7;
+}
+
+class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls, AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs cls:$dst), (ins mode:$addr),
+           mnemonic#"\t$dst, $addr",
+           [(set cls:$dst, (operator mode:$addr))]> {
+  let mayLoad = 1;
+}
+
+class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls>
+  : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr),
+            mnemonic#"\t$dst, $addr",
+            [(set cls:$dst, (operator bdxaddr12only:$addr))]> {
+  let mayLoad = 1;
+}
+
+class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls, AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr),
+            mnemonic#"\t$dst, $addr",
+            [(set cls:$dst, (operator mode:$addr))]> {
+  let mayLoad = 1;
+}
+
+multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                       SDPatternOperator operator, RegisterOperand cls> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+  }
+}
+
+class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+               RegisterOperand cls1, RegisterOperand cls2>
+  : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+// Here the assembly and dag operands are in natural order,
+// but the first input operand maps to R3 and the second to R2.
+// This is used for "CPSDR R1, R3, R2", which is equivalent to
+// R1 = copysign (R3, R2).
+//
+// Direct uses of the instruction must pass operands in encoding order --
+// R1, R2, R3 -- so they must pass the source operands in reverse order.
+class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                   RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1),
+            mnemonic#"\t$dst, $src1, $src2",
+            [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>;
+
+class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+               RegisterOperand cls, Immediate imm>
+  : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                RegisterOperand cls, Immediate imm>
+  : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+               RegisterOperand cls, SDPatternOperator load,
+               AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  RegisterOperand cls, SDPatternOperator load>
+  : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls:$dst, (operator cls:$src1,
+                                      (load bdxaddr12only:$src2)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                RegisterOperand cls, SDPatternOperator load,
+                AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                        SDPatternOperator operator, RegisterOperand cls,
+                        SDPatternOperator load> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
+                         bdxaddr20pair>;
+  }
+}
+
+class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+               Operand imm, AddressingMode mode = bdaddr12only>
+  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+           mnemonic#"\t$addr, $src",
+           [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                Operand imm, AddressingMode mode = bdaddr20only>
+  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
+                        bits<16> siyOpcode, SDPatternOperator operator,
+                        Operand imm> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+  }
+}
+
+class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls, AddressingMode mode>
+  : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls:$dst, (operator cls:$src1, mode:$src2))]> {
+  let R3 = 0;
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls, AddressingMode mode>
+  : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+            mnemonic#"\t$dst, $src1, $src2",
+            [(set cls:$dst, (operator cls:$src1, mode:$src2))]>;
+
+class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                RegisterOperand cls1, RegisterOperand cls2>
+  : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2),
+           mnemonic#"\t$src1, $src2",
+           [(operator cls1:$src1, cls2:$src2)]>;
+
+class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls1:$src1, cls2:$src2)]>;
+
+class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                RegisterOperand cls, Immediate imm>
+  : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2),
+           mnemonic#"\t$src1, $src2",
+           [(operator cls:$src1, imm:$src2)]>;
+
+class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, Immediate imm>
+  : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, imm:$src2)]>;
+
+class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                   RegisterOperand cls, SDPatternOperator load>
+  : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, (load pcrel32:$src2))]> {
+  let mayLoad = 1;
+  // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+  // However, BDXs have two extra operands and are therefore 6 units more
+  // complex.
+  let AddedComplexity = 7;
+}
+
+class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                RegisterOperand cls, SDPatternOperator load,
+                AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2),
+           mnemonic#"\t$src1, $src2",
+           [(operator cls:$src1, (load mode:$src2))]> {
+  let mayLoad = 1;
+}
+
+class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, SDPatternOperator load>
+  : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, (load bdxaddr12only:$src2))]> {
+  let mayLoad = 1;
+}
+
+class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, SDPatternOperator load,
+                 AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, (load mode:$src2))]> {
+  let mayLoad = 1;
+}
+
+multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                         SDPatternOperator operator, RegisterOperand cls,
+                         SDPatternOperator load> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : CompareRX<mnemonic, rxOpcode, operator, cls,
+                         load, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls,
+                          load, bdxaddr20pair>;
+  }
+}
+
+class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                SDPatternOperator load, Immediate imm,
+                AddressingMode mode = bdaddr12only>
+  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+           mnemonic#"\t$addr, $src",
+           [(operator (load mode:$addr), imm:$src)]> {
+  let mayLoad = 1;
+}
+
+class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 SDPatternOperator load, Immediate imm>
+  : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator (load bdaddr12only:$addr), imm:$src)]> {
+  let mayLoad = 1;
+}
+
+class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 SDPatternOperator load, Immediate imm,
+                 AddressingMode mode = bdaddr20only>
+  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator (load mode:$addr), imm:$src)]> {
+  let mayLoad = 1;
+}
+
+multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+                         SDPatternOperator operator, SDPatternOperator load,
+                         Immediate imm> {
+  let Function = mnemonic in {
+    let PairType = "12" in
+      def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm,
+                          bdaddr20pair>;
+  }
+}
+
+class TernaryRRD<string mnemonic, bits<16> opcode,
+                 SDPatternOperator operator, RegisterOperand cls>
+  : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3),
+            mnemonic#"\t$dst, $src2, $src3",
+            [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, SDPatternOperator load>
+  : InstRXF<opcode, (outs cls:$dst),
+            (ins cls:$src1, cls:$src2, bdxaddr12only:$src3),
+            mnemonic#"\t$dst, $src2, $src3",
+            [(set cls:$dst, (operator cls:$src1, cls:$src2,
+                                      (load bdxaddr12only:$src3)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                RegisterOperand cls, AddressingMode mode = bdaddr12only>
+  : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
+           mnemonic#"\t$dst, $new, $ptr",
+           [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
+  let Constraints = "$old = $dst";
+  let DisableEncoding = "$old";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, AddressingMode mode = bdaddr20only>
+  : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
+            mnemonic#"\t$dst, $new, $ptr",
+            [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
+  let Constraints = "$old = $dst";
+  let DisableEncoding = "$old";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+                         SDPatternOperator operator, RegisterOperand cls> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>;
+  }
+}
+
+class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                       RegisterOperand cls2>
+  : InstRIEf<opcode, (outs cls1:$dst),
+             (ins cls1:$src1, cls2:$src2,
+                  uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3),
+             mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+//
+// Convenience instructions that get lowered to real instructions
+// by either SystemZTargetLowering::EmitInstrWithCustomInserter()
+// or SystemZInstrInfo::expandPostRAPseudo().
+//
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, list<dag> pattern>
+  : InstSystemZ<0, outs, ins, "", pattern> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
+// the value of the PSW's 2-bit condition code field.
+class SelectWrapper<RegisterOperand cls>
+  : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, i8imm:$cc),
+           [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, imm:$cc))]> {
+  let usesCustomInserter = 1;
+  // Although the instructions used by these nodes do not in themselves
+  // change the PSW, the insertion requires new blocks, and the PSW cannot
+  // be live across them.
+  let Defs = [PSW];
+  let Uses = [PSW];
+}
+
+// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation.  PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
+                       dag pat, DAGOperand operand>
+  : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2),
+           [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> {
+  let Defs = [PSW];
+  let Has20BitOffset = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let usesCustomInserter = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadBinaryReg32<SDPatternOperator operator>
+  : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
+class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
+  : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
+class AtomicLoadBinaryReg64<SDPatternOperator operator>
+  : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
+class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
+  : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
+
+// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation.  PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
+                        DAGOperand operand>
+  : Pseudo<(outs GR32:$dst),
+           (ins bdaddr20only:$ptr, operand:$src2, ADDR32:$bitshift,
+                ADDR32:$negbitshift, uimm32:$bitsize),
+           [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift,
+                                      ADDR32:$negbitshift, uimm32:$bitsize))]> {
+  let Defs = [PSW];
+  let Has20BitOffset = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let usesCustomInserter = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadWBinaryReg<SDPatternOperator operator>
+  : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
+class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
+  : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 0000000..0718c83
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,444 @@
+//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstrInfo.h"
+#include "SystemZInstrBuilder.h"
+
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
+#include "SystemZGenInstrInfo.inc"
+
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+  : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+    RI(tm, *this) {
+}
+
+// MI is a 128-bit load or store.  Split it into two 64-bit loads or stores,
+// each having the opcode given by NewOpcode.
+void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
+                                 unsigned NewOpcode) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+
+  // Get two load or store instructions.  Use the original instruction for one
+  // of them (arbitarily the second here) and create a clone for the other.
+  MachineInstr *EarlierMI = MF.CloneMachineInstr(MI);
+  MBB->insert(MI, EarlierMI);
+
+  // Set up the two 64-bit registers.
+  MachineOperand &HighRegOp = EarlierMI->getOperand(0);
+  MachineOperand &LowRegOp = MI->getOperand(0);
+  HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high));
+  LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low));
+
+  // The address in the first (high) instruction is already correct.
+  // Adjust the offset in the second (low) instruction.
+  MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
+  MachineOperand &LowOffsetOp = MI->getOperand(2);
+  LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
+
+  // Set the opcodes.
+  unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
+  unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
+  assert(HighOpcode && LowOpcode && "Both offsets should be in range");
+
+  EarlierMI->setDesc(get(HighOpcode));
+  MI->setDesc(get(LowOpcode));
+}
+
+// Split ADJDYNALLOC instruction MI.
+void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineOperand &OffsetMO = MI->getOperand(2);
+
+  uint64_t Offset = (MFFrame->getMaxCallFrameSize() +
+                     SystemZMC::CallFrameSize +
+                     OffsetMO.getImm());
+  unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset);
+  assert(NewOpcode && "No support for huge argument lists yet");
+  MI->setDesc(get(NewOpcode));
+  OffsetMO.setImm(Offset);
+}
+
+// If MI is a simple load or store for a frame object, return the register
+// it loads or stores and set FrameIndex to the index of the frame object.
+// Return 0 otherwise.
+//
+// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
+static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if ((MCID.TSFlags & Flag) &&
+      MI->getOperand(1).isFI() &&
+      MI->getOperand(2).getImm() == 0 &&
+      MI->getOperand(3).getReg() == 0) {
+    FrameIndex = MI->getOperand(1).getIndex();
+    return MI->getOperand(0).getReg();
+  }
+  return 0;
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad);
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                     MachineBasicBlock *&FBB,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     bool AllowModify) const {
+  // Most of the code and comments here are boilerplate.
+
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+
+    // Working from the bottom, when we see a non-terminator instruction, we're
+    // done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled by this
+    // analysis.
+    unsigned ThisCond;
+    const MachineOperand *ThisTarget;
+    if (!isBranch(I, ThisCond, ThisTarget))
+      return true;
+
+    // Can't handle indirect branches.
+    if (!ThisTarget->isMBB())
+      return true;
+
+    if (ThisCond == SystemZ::CCMASK_ANY) {
+      // Handle unconditional branches.
+      if (!AllowModify) {
+        TBB = ThisTarget->getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = ThisTarget->getMBB();
+      continue;
+    }
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      // FIXME: add X86-style branch swap
+      FBB = TBB;
+      TBB = ThisTarget->getMBB();
+      Cond.push_back(MachineOperand::CreateImm(ThisCond));
+      continue;
+    }
+
+    // Handle subsequent conditional branches.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to the same
+    // destination.
+    if (TBB != ThisTarget->getMBB())
+      return true;
+
+    // If the conditions are the same, we can leave them alone.
+    unsigned OldCond = Cond[0].getImm();
+    if (OldCond == ThisCond)
+      continue;
+
+    // FIXME: Try combining conditions like X86 does.  Should be easy on Z!
+  }
+
+  return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  // Most of the code and comments here are boilerplate.
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    unsigned Cond;
+    const MachineOperand *Target;
+    if (!isBranch(I, Cond, Target))
+      break;
+    if (!Target->isMBB())
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  // In this function we output 32-bit branches, which should always
+  // have enough range.  They can be shortened and relaxed by later code
+  // in the pipeline, if desired.
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "SystemZ branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  unsigned CC = Cond[0].getImm();
+  BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB);
+  ++Count;
+
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+void
+SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+			      MachineBasicBlock::iterator MBBI, DebugLoc DL,
+			      unsigned DestReg, unsigned SrcReg,
+			      bool KillSrc) const {
+  // Split 128-bit GPR moves into two 64-bit moves.  This handles ADDR128 too.
+  if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
+    copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high),
+                RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc);
+    copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low),
+                RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc);
+    return;
+  }
+
+  // Everything else needs only one instruction.
+  unsigned Opcode;
+  if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LR;
+  else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LGR;
+  else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LER;
+  else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LDR;
+  else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LXR;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+				      MachineBasicBlock::iterator MBBI,
+				      unsigned SrcReg, bool isKill,
+				      int FrameIdx,
+				      const TargetRegisterClass *RC,
+				      const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Callers may expect a single instruction, so keep 128-bit moves
+  // together for now and lower them after register allocation.
+  unsigned LoadOpcode, StoreOpcode;
+  getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+  addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
+		    .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+				       MachineBasicBlock::iterator MBBI,
+				       unsigned DestReg, int FrameIdx,
+				       const TargetRegisterClass *RC,
+				       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Callers may expect a single instruction, so keep 128-bit moves
+  // together for now and lower them after register allocation.
+  unsigned LoadOpcode, StoreOpcode;
+  getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+  addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg),
+                    FrameIdx);
+}
+
+bool
+SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::L128:
+    splitMove(MI, SystemZ::LG);
+    return true;
+
+  case SystemZ::ST128:
+    splitMove(MI, SystemZ::STG);
+    return true;
+
+  case SystemZ::LX:
+    splitMove(MI, SystemZ::LD);
+    return true;
+
+  case SystemZ::STX:
+    splitMove(MI, SystemZ::STD);
+    return true;
+
+  case SystemZ::ADJDYNALLOC:
+    splitAdjDynAlloc(MI);
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid branch condition!");
+  Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY);
+  return false;
+}
+
+bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond,
+                                const MachineOperand *&Target) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::BR:
+  case SystemZ::J:
+  case SystemZ::JG:
+    Cond = SystemZ::CCMASK_ANY;
+    Target = &MI->getOperand(0);
+    return true;
+
+  case SystemZ::BRC:
+  case SystemZ::BRCL:
+    Cond = MI->getOperand(0).getImm();
+    Target = &MI->getOperand(1);
+    return true;
+
+  default:
+    assert(!MI->getDesc().isBranch() && "Unknown branch opcode");
+    return false;
+  }
+}
+
+void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
+                                           unsigned &LoadOpcode,
+                                           unsigned &StoreOpcode) const {
+  if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
+    LoadOpcode = SystemZ::L;
+    StoreOpcode = SystemZ::ST32;
+  } else if (RC == &SystemZ::GR64BitRegClass ||
+             RC == &SystemZ::ADDR64BitRegClass) {
+    LoadOpcode = SystemZ::LG;
+    StoreOpcode = SystemZ::STG;
+  } else if (RC == &SystemZ::GR128BitRegClass ||
+             RC == &SystemZ::ADDR128BitRegClass) {
+    LoadOpcode = SystemZ::L128;
+    StoreOpcode = SystemZ::ST128;
+  } else if (RC == &SystemZ::FP32BitRegClass) {
+    LoadOpcode = SystemZ::LE;
+    StoreOpcode = SystemZ::STE;
+  } else if (RC == &SystemZ::FP64BitRegClass) {
+    LoadOpcode = SystemZ::LD;
+    StoreOpcode = SystemZ::STD;
+  } else if (RC == &SystemZ::FP128BitRegClass) {
+    LoadOpcode = SystemZ::LX;
+    StoreOpcode = SystemZ::STX;
+  } else
+    llvm_unreachable("Unsupported regclass to load or store");
+}
+
+unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
+                                              int64_t Offset) const {
+  const MCInstrDesc &MCID = get(Opcode);
+  int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset);
+  if (isUInt<12>(Offset) && isUInt<12>(Offset2)) {
+    // Get the instruction to use for unsigned 12-bit displacements.
+    int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode);
+    if (Disp12Opcode >= 0)
+      return Disp12Opcode;
+
+    // All address-related instructions can use unsigned 12-bit
+    // displacements.
+    return Opcode;
+  }
+  if (isInt<20>(Offset) && isInt<20>(Offset2)) {
+    // Get the instruction to use for signed 20-bit displacements.
+    int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode);
+    if (Disp20Opcode >= 0)
+      return Disp20Opcode;
+
+    // Check whether Opcode allows signed 20-bit displacements.
+    if (MCID.TSFlags & SystemZII::Has20BitOffset)
+      return Opcode;
+  }
+  return 0;
+}
+
+void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned Reg, uint64_t Value) const {
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned Opcode;
+  if (isInt<16>(Value))
+    Opcode = SystemZ::LGHI;
+  else if (SystemZ::isImmLL(Value))
+    Opcode = SystemZ::LLILL;
+  else if (SystemZ::isImmLH(Value)) {
+    Opcode = SystemZ::LLILH;
+    Value >>= 16;
+  } else {
+    assert(isInt<32>(Value) && "Huge values not handled yet");
+    Opcode = SystemZ::LGFI;
+  }
+  BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 0000000..0fc4761
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,123 @@
+//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+namespace SystemZII {
+  enum {
+    // See comments in SystemZInstrFormats.td.
+    SimpleBDXLoad  = (1 << 0),
+    SimpleBDXStore = (1 << 1),
+    Has20BitOffset = (1 << 2),
+    HasIndex       = (1 << 3),
+    Is128Bit       = (1 << 4)
+  };
+  // SystemZ MachineOperand target flags.
+  enum {
+    // Masks out the bits for the access model.
+    MO_SYMBOL_MODIFIER = (1 << 0),
+
+    // @GOT (aka @GOTENT)
+    MO_GOT = (1 << 0)
+  };
+}
+
+class SystemZInstrInfo : public SystemZGenInstrInfo {
+  const SystemZRegisterInfo RI;
+
+  void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
+  void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
+
+public:
+  explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+  // Override TargetInstrInfo.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const LLVM_OVERRIDE;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const LLVM_OVERRIDE;
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const LLVM_OVERRIDE;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const LLVM_OVERRIDE;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const LLVM_OVERRIDE;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const LLVM_OVERRIDE;
+  virtual void
+    storeRegToStackSlot(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI,
+                        unsigned SrcReg, bool isKill, int FrameIndex,
+                        const TargetRegisterClass *RC,
+                        const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
+  virtual void
+    loadRegFromStackSlot(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI,
+                         unsigned DestReg, int FrameIdx,
+                         const TargetRegisterClass *RC,
+                         const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
+  virtual bool
+    expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE;
+  virtual bool
+    ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+    LLVM_OVERRIDE;
+
+  // Return the SystemZRegisterInfo, which this class owns.
+  const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+  // Return true if MI is a conditional or unconditional branch.
+  // When returning true, set Cond to the mask of condition-code
+  // values on which the instruction will branch, and set Target
+  // to the operand that contains the branch target.  This target
+  // can be a register or a basic block.
+  bool isBranch(const MachineInstr *MI, unsigned &Cond,
+                const MachineOperand *&Target) const;
+
+  // Get the load and store opcodes for a given register class.
+  void getLoadStoreOpcodes(const TargetRegisterClass *RC,
+                           unsigned &LoadOpcode, unsigned &StoreOpcode) const;
+
+  // Opcode is the opcode of an instruction that has an address operand,
+  // and the caller wants to perform that instruction's operation on an
+  // address that has displacement Offset.  Return the opcode of a suitable
+  // instruction (which might be Opcode itself) or 0 if no such instruction
+  // exists.
+  unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+
+  // Emit code before MBBI in MI to move immediate value Value into
+  // physical register Reg.
+  void loadImmediate(MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator MBBI,
+                     unsigned Reg, uint64_t Value) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 0000000..7ffa382
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,955 @@
+//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+                              [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                              [(callseq_end timm:$amt1, timm:$amt2)]>;
+
+let neverHasSideEffects = 1 in {
+  // Takes as input the value of the stack pointer after a dynamic allocation
+  // has been made.  Sets the output to the address of the dynamically-
+  // allocated area itself, skipping the outgoing arguments.
+  //
+  // This expands to an LA or LAY instruction.  We restrict the offset
+  // to the range of LA and keep the LAY range in reserve for when
+  // the size of the outgoing arguments is added.
+  def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
+                           [(set GR64:$dst, dynalloc12only:$src)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control flow instructions
+//===----------------------------------------------------------------------===//
+
+// A return instruction.  R1 is the condition-code mask (all 1s)
+// and R2 is the target address, which is always stored in %r14.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1,
+    R1 = 15, R2 = 14, isCodeGenOnly = 1 in {
+  def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>;
+}
+
+// Unconditional branches.  R1 is the condition-code mask (all 1s).
+let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
+  let isIndirectBranch = 1 in
+    def BR : InstRR<0x07, (outs), (ins ADDR64:$dst),
+                    "br\t$dst", [(brind ADDR64:$dst)]>;
+
+  // An assembler extended mnemonic for BRC.  Use a separate instruction for
+  // the asm parser, so that we don't relax Js to external symbols into JGs.
+  let isCodeGenOnly = 1 in
+    def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+  let isAsmParserOnly = 1 in
+    def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+
+  // An assembler extended mnemonic for BRCL.  (The extension is "G"
+  // rather than "L" because "JL" is "Jump if Less".)
+  def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
+                   "jg\t$dst", [(br bb:$dst)]>;
+}
+
+// Conditional branches.  It's easier for LLVM to handle these branches
+// in their raw BRC/BRCL form, with the 4-bit condition-code mask being
+// the first operand.  It seems friendlier to use mnemonic forms like
+// JE and JLH when writing out the assembly though.
+multiclass CondBranches<Operand imm, string short, string long> {
+  let isBranch = 1, isTerminator = 1, Uses = [PSW] in {
+    def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>;
+    def L  : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>;
+  }
+}
+let isCodeGenOnly = 1 in
+  defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">;
+let isAsmParserOnly = 1 in
+  defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">;
+
+def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>;
+
+// Define AsmParser mnemonics for each condition code.
+multiclass CondExtendedMnemonic<bits<4> Cond, string name> {
+  let R1 = Cond in {
+    def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst),
+                    "j"##name##"\t$dst", []>;
+    def L  : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
+                    "jg"##name##"\t$dst", []>;
+  }
+}
+let isAsmParserOnly = 1 in {
+  defm AsmJO   : CondExtendedMnemonic<1,  "o">;
+  defm AsmJH   : CondExtendedMnemonic<2,  "h">;
+  defm AsmJNLE : CondExtendedMnemonic<3,  "nle">;
+  defm AsmJL   : CondExtendedMnemonic<4,  "l">;
+  defm AsmJNHE : CondExtendedMnemonic<5,  "nhe">;
+  defm AsmJLH  : CondExtendedMnemonic<6,  "lh">;
+  defm AsmJNE  : CondExtendedMnemonic<7,  "ne">;
+  defm AsmJE   : CondExtendedMnemonic<8,  "e">;
+  defm AsmJNLH : CondExtendedMnemonic<9,  "nlh">;
+  defm AsmJHE  : CondExtendedMnemonic<10, "he">;
+  defm AsmJNL  : CondExtendedMnemonic<11, "nl">;
+  defm AsmJLE  : CondExtendedMnemonic<12, "le">;
+  defm AsmJNH  : CondExtendedMnemonic<13, "nh">;
+  defm AsmJNO  : CondExtendedMnemonic<14, "no">;
+}
+
+def Select32 : SelectWrapper<GR32>;
+def Select64 : SelectWrapper<GR64>;
+
+//===----------------------------------------------------------------------===//
+// Call instructions
+//===----------------------------------------------------------------------===//
+
+// The definitions here are for the call-clobbered registers.
+let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+                        F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
+    R1 = 14, isCodeGenOnly = 1 in {
+  def BRAS  : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops),
+                     "bras\t%r14, $dst", []>;
+  def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops),
+                      "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>;
+  def BASR  : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops),
+                     "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>;
+}
+
+// Define the general form of the call instructions for the asm parser.
+// These instructions don't hard-code %r14 as the return address register.
+let isAsmParserOnly = 1 in {
+  def AsmBRAS  : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst),
+                        "bras\t$save, $dst", []>;
+  def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst),
+                        "brasl\t$save, $dst", []>;
+  def AsmBASR  : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst),
+                        "basr\t$save, $dst", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Register moves.
+let neverHasSideEffects = 1 in {
+  def LR  : UnaryRR <"lr",  0x18,   null_frag, GR32, GR32>;
+  def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
+}
+
+// Immediate moves.
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+  // 16-bit sign-extended immediates.
+  def LHI  : UnaryRI<"lhi",  0xA78, bitconvert, GR32, imm32sx16>;
+  def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>;
+
+  // Other 16-bit immediates.
+  def LLILL : UnaryRI<"llill", 0xA5F, bitconvert, GR64, imm64ll16>;
+  def LLILH : UnaryRI<"llilh", 0xA5E, bitconvert, GR64, imm64lh16>;
+  def LLIHL : UnaryRI<"llihl", 0xA5D, bitconvert, GR64, imm64hl16>;
+  def LLIHH : UnaryRI<"llihh", 0xA5C, bitconvert, GR64, imm64hh16>;
+
+  // 32-bit immediates.
+  def LGFI  : UnaryRIL<"lgfi",  0xC01, bitconvert, GR64, imm64sx32>;
+  def LLILF : UnaryRIL<"llilf", 0xC0F, bitconvert, GR64, imm64lf32>;
+  def LLIHF : UnaryRIL<"llihf", 0xC0E, bitconvert, GR64, imm64hf32>;
+}
+
+// Register loads.
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+  defm L   : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
+  def  LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
+
+  def LG   : UnaryRXY<"lg", 0xE304, load, GR64>;
+  def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def L128 : Pseudo<(outs GR128:$dst), (ins bdxaddr20only128:$src),
+                      [(set GR128:$dst, (load bdxaddr20only128:$src))]>;
+  }
+}
+
+// Register stores.
+let SimpleBDXStore = 1 in {
+  let isCodeGenOnly = 1 in {
+    defm ST32   : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
+    def  STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
+  }
+
+  def STG   : StoreRXY<"stg", 0xE324, store, GR64>;
+  def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def ST128 : Pseudo<(outs), (ins GR128:$src, bdxaddr20only128:$dst),
+                       [(store GR128:$src, bdxaddr20only128:$dst)]>;
+  }
+}
+
+// 8-bit immediate stores to 8-bit fields.
+defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
+
+// 16-bit immediate stores to 16-, 32- or 64-bit fields.
+def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>;
+def MVHI  : StoreSIL<"mvhi",  0xE54C, store,         imm32sx16>;
+def MVGHI : StoreSIL<"mvghi", 0xE548, store,         imm64sx16>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LBR : UnaryRRE<"lbr", 0xB926, sext8,  GR32, GR32>;
+  def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LGBR : UnaryRRE<"lgbr", 0xB906, sext8,  GR64, GR64>;
+  def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
+  def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
+}
+
+// Match 32-to-64-bit sign extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(sext_inreg GR64:$src, i32),
+          (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// 32-bit extensions from memory.
+def  LB   : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>;
+defm LH   : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>;
+def  LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LGB   : UnaryRXY<"lgb", 0xE377, sextloadi8,  GR64>;
+def LGH   : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>;
+def LGF   : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>;
+
+// If the sign of a load-extend operation doesn't matter, use the signed ones.
+// There's not really much to choose between the sign and zero extensions,
+// but LH is more compact than LLH for small offsets.
+def : Pat<(i32 (extloadi8  bdxaddr20only:$src)), (LB  bdxaddr20only:$src)>;
+def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH  bdxaddr12pair:$src)>;
+def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>;
+
+def : Pat<(i64 (extloadi8  bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>;
+def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>;
+def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LLCR : UnaryRRE<"llcr", 0xB994, zext8,  GR32, GR32>;
+  def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8,  GR64, GR64>;
+  def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
+  def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
+}
+
+// Match 32-to-64-bit zero extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(and GR64:$src, 0xffffffff),
+          (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// 32-bit extensions from memory.
+def LLC   : UnaryRXY<"llc", 0xE394, zextloadi8,  GR32>;
+def LLH   : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LLGC   : UnaryRXY<"llgc", 0xE390, zextloadi8,  GR64>;
+def LLGH   : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>;
+def LLGF   : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+// Truncations of 64-bit registers to 32-bit registers.
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// Truncations of 32-bit registers to memory.
+let isCodeGenOnly = 1 in {
+  defm STC32   : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8,  GR32>;
+  defm STH32   : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>;
+  def  STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
+}
+
+// Truncations of 64-bit registers to memory.
+defm STC   : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8,  GR64>;
+defm STH   : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>;
+def  STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>;
+defm ST    : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>;
+def  STRL  : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Multi-register loads.
+def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>;
+
+// Multi-register stores.
+def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+// Byte-swapping register moves.
+let neverHasSideEffects = 1 in {
+  def LRVR  : UnaryRRE<"lrvr",  0xB91F, bswap, GR32, GR32>;
+  def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
+}
+
+// Byte-swapping loads.
+def LRV  : UnaryRXY<"lrv",  0xE31E, loadu<bswap>, GR32>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap>, GR64>;
+
+// Byte-swapping stores.
+def STRV  : StoreRXY<"strv",  0xE33E, storeu<bswap>, GR32>;
+def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+// Load BDX-style addresses.
+let neverHasSideEffects = 1, Function = "la" in {
+  let PairType = "12" in
+    def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src),
+                    "la\t$dst, $src",
+                    [(set GR64:$dst, laaddr12pair:$src)]>;
+  let PairType = "20" in
+    def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src),
+                      "lay\t$dst, $src",
+                      [(set GR64:$dst, laaddr20pair:$src)]>;
+}
+
+// Load a PC-relative address.  There's no version of this instruction
+// with a 16-bit offset, so there's no relaxation.
+let neverHasSideEffects = 1 in {
+  def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src),
+                     "larl\t$dst, $src",
+                     [(set GR64:$dst, pcrel32:$src)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Negation
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  def LCR   : UnaryRR <"lcr",   0x13,   ineg,      GR32, GR32>;
+  def LCGR  : UnaryRRE<"lcgr",  0xB903, ineg,      GR64, GR64>;
+  def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>;
+}
+defm : SXU<ineg, LCGFR>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in
+  defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>;
+
+defm : InsertMem<"inserti8", IC32,  GR32, zextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>;
+
+defm : InsertMem<"inserti8", IC,  GR64, zextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>;
+
+// Insertions of a 16-bit immediate, leaving other bits unaffected.
+// We don't have or_as_insert equivalents of these operations because
+// OI is available instead.
+let isCodeGenOnly = 1 in {
+  def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
+  def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
+}
+def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>;
+def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>;
+def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>;
+def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>;
+
+// ...likewise for 32-bit immediates.  For GR32s this is a general
+// full-width move.  (We use IILF rather than something like LLILF
+// for 32-bit moves because IILF leaves the upper 32 bits of the
+// GR64 unchanged.)
+let isCodeGenOnly = 1 in {
+  def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+}
+def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>;
+def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>;
+
+// An alternative model of inserthf, with the first operand being
+// a zero-extended value.
+def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
+          (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit),
+                imm64hf32:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+// Plain addition.
+let Defs = [PSW] in {
+  // Addition of a register.
+  let isCommutable = 1 in {
+    def AR  : BinaryRR <"ar",  0x1A,   add, GR32, GR32>;
+    def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>;
+  }
+  def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
+
+  // Addition of signed 16-bit immediates.
+  def AHI  : BinaryRI<"ahi",  0xA7A, add, GR32, imm32sx16>;
+  def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>;
+
+  // Addition of signed 32-bit immediates.
+  def AFI  : BinaryRIL<"afi",  0xC29, add, GR32, simm32>;
+  def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
+
+  // Addition of memory.
+  defm AH  : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>;
+  defm A   : BinaryRXPair<"a",  0x5A, 0xE35A, add, GR32, load>;
+  def  AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>;
+  def  AG  : BinaryRXY<"ag",  0xE308, add, GR64, load>;
+
+  // Addition to memory.
+  def ASI  : BinarySIY<"asi",  0xEB6A, add, imm32sx8>;
+  def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>;
+}
+defm : SXB<add, GR64, AGFR>;
+
+// Addition producing a carry.
+let Defs = [PSW] in {
+  // Addition of a register.
+  let isCommutable = 1 in {
+    def ALR  : BinaryRR <"alr",  0x1E,   addc, GR32, GR32>;
+    def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>;
+  }
+  def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
+
+  // Addition of unsigned 32-bit immediates.
+  def ALFI  : BinaryRIL<"alfi",  0xC2B, addc, GR32, uimm32>;
+  def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
+
+  // Addition of memory.
+  defm AL   : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>;
+  def  ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>;
+  def  ALG  : BinaryRXY<"alg",  0xE30A, addc, GR64, load>;
+}
+defm : ZXB<addc, GR64, ALGFR>;
+
+// Addition producing and using a carry.
+let Defs = [PSW], Uses = [PSW] in {
+  // Addition of a register.
+  def ALCR  : BinaryRRE<"alcr",  0xB998, adde, GR32, GR32>;
+  def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>;
+
+  // Addition of memory.
+  def ALC  : BinaryRXY<"alc",  0xE398, adde, GR32, load>;
+  def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+// Plain substraction.  Although immediate forms exist, we use the
+// add-immediate instruction instead.
+let Defs = [PSW] in {
+  // Subtraction of a register.
+  def SR   : BinaryRR <"sr",   0x1B,   sub,       GR32, GR32>;
+  def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
+  def SGR  : BinaryRRE<"sgr",  0xB909, sub,       GR64, GR64>;
+
+  // Subtraction of memory.
+  defm S   : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>;
+  def  SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>;
+  def  SG  : BinaryRXY<"sg",  0xE309, sub, GR64, load>;
+}
+defm : SXB<sub, GR64, SGFR>;
+
+// Subtraction producing a carry.
+let Defs = [PSW] in {
+  // Subtraction of a register.
+  def SLR   : BinaryRR <"slr",   0x1F,   subc,      GR32, GR32>;
+  def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
+  def SLGR  : BinaryRRE<"slgr",  0xB90B, subc,      GR64, GR64>;
+
+  // Subtraction of unsigned 32-bit immediates.  These don't match
+  // subc because we prefer addc for constants.
+  def SLFI  : BinaryRIL<"slfi",  0xC25, null_frag, GR32, uimm32>;
+  def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>;
+
+  // Subtraction of memory.
+  defm SL   : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>;
+  def  SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>;
+  def  SLG  : BinaryRXY<"slg",  0xE30B, subc, GR64, load>;
+}
+defm : ZXB<subc, GR64, SLGFR>;
+
+// Subtraction producing and using a carry.
+let Defs = [PSW], Uses = [PSW] in {
+  // Subtraction of a register.
+  def SLBR  : BinaryRRE<"slbr",  0xB999, sube, GR32, GR32>;
+  def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>;
+
+  // Subtraction of memory.
+  def SLB  : BinaryRXY<"slb",  0xE399, sube, GR32, load>;
+  def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  // ANDs of a register.
+  let isCommutable = 1 in {
+    def NR  : BinaryRR <"nr",  0x14,   and, GR32, GR32>;
+    def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>;
+  }
+
+  // ANDs of a 16-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in {
+    def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+    def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+  }
+  def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
+  def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
+  def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
+  def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
+
+  // ANDs of a 32-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in
+    def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+  def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
+  def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
+
+  // ANDs of memory.
+  defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>;
+  def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load>;
+
+  // AND to memory
+  defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+}
+defm : RMWIByte<and, bdaddr12pair, NI>;
+defm : RMWIByte<and, bdaddr20pair, NIY>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  // ORs of a register.
+  let isCommutable = 1 in {
+    def OR  : BinaryRR <"or",  0x16,   or, GR32, GR32>;
+    def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>;
+  }
+
+  // ORs of a 16-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in {
+    def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
+    def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
+  }
+  def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>;
+  def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>;
+  def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>;
+  def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
+
+  // ORs of a 32-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in
+    def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
+  def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
+  def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
+
+  // ORs of memory.
+  defm O  : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>;
+  def  OG : BinaryRXY<"og", 0xE381, or, GR64, load>;
+
+  // OR to memory
+  defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+}
+defm : RMWIByte<or, bdaddr12pair, OI>;
+defm : RMWIByte<or, bdaddr20pair, OIY>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  // XORs of a register.
+  let isCommutable = 1 in {
+    def XR  : BinaryRR <"xr",  0x17,   xor, GR32, GR32>;
+    def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>;
+  }
+
+  // XORs of a 32-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in
+    def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
+  def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
+  def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
+
+  // XORs of memory.
+  defm X  : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>;
+  def  XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>;
+
+  // XOR to memory
+  defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+}
+defm : RMWIByte<xor, bdaddr12pair, XI>;
+defm : RMWIByte<xor, bdaddr20pair, XIY>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+// Multiplication of a register.
+let isCommutable = 1 in {
+  def MSR  : BinaryRRE<"msr",  0xB252, mul, GR32, GR32>;
+  def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>;
+}
+def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>;
+defm : SXB<mul, GR64, MSGFR>;
+
+// Multiplication of a signed 16-bit immediate.
+def MHI  : BinaryRI<"mhi",  0xA7C, mul, GR32, imm32sx16>;
+def MGHI : BinaryRI<"mghi", 0xA7D, mul, GR64, imm64sx16>;
+
+// Multiplication of a signed 32-bit immediate.
+def MSFI  : BinaryRIL<"msfi",  0xC21, mul, GR32, simm32>;
+def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
+
+// Multiplication of memory.
+defm MH   : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>;
+defm MS   : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>;
+def  MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>;
+def  MSG  : BinaryRXY<"msg",  0xE30C, mul, GR64, load>;
+
+// Multiplication of a register, producing two results.
+def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
+
+// Multiplication of memory, producing two results.
+def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+// Division and remainder, from registers.
+def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag,   GR128, GR32>;
+def DSGR  : BinaryRRE<"dsgr",  0xB90D, z_sdivrem64, GR128, GR64>;
+def DLR   : BinaryRRE<"dlr",   0xB997, z_udivrem32, GR128, GR32>;
+def DLGR  : BinaryRRE<"dlgr",  0xB987, z_udivrem64, GR128, GR64>;
+defm : SXB<z_sdivrem64, GR128, DSGFR>;
+
+// Division and remainder, from memory.
+def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>;
+def DSG  : BinaryRXY<"dsg",  0xE30D, z_sdivrem64, GR128, load>;
+def DL   : BinaryRXY<"dl",   0xE397, z_udivrem32, GR128, load>;
+def DLG  : BinaryRXY<"dlg",  0xE387, z_udivrem64, GR128, load>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+// Shift left.
+let neverHasSideEffects = 1 in {
+  def SLL  : ShiftRS <"sll",  0x89,   shl, GR32, shift12only>;
+  def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>;
+}
+
+// Logical shift right.
+let neverHasSideEffects = 1 in {
+  def SRL  : ShiftRS <"srl",  0x88,   srl, GR32, shift12only>;
+  def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>;
+}
+
+// Arithmetic shift right.
+let Defs = [PSW] in {
+  def SRA  : ShiftRS <"sra",  0x8A,   sra, GR32, shift12only>;
+  def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>;
+}
+
+// Rotate left.
+let neverHasSideEffects = 1 in {
+  def RLL  : ShiftRSY<"rll",  0xEB1D, rotl, GR32, shift20only>;
+  def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>;
+}
+
+// Rotate second operand left and inserted selected bits into first operand.
+// These can act like 32-bit operands provided that the constant start and
+// end bits (operands 2 and 3) are in the range [32, 64)
+let Defs = [PSW] in {
+  let isCodeGenOnly = 1 in
+    def RISBG32 : RotateSelectRIEf<"risbg",  0xEC55, GR32, GR32>;
+  def RISBG : RotateSelectRIEf<"risbg",  0xEC55, GR64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+// Signed comparisons.
+let Defs = [PSW] in {
+  // Comparison with a register.
+  def CR   : CompareRR <"cr",   0x19,   z_cmp,     GR32, GR32>;
+  def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
+  def CGR  : CompareRRE<"cgr",  0xB920, z_cmp,     GR64, GR64>;
+
+  // Comparison with a signed 16-bit immediate.
+  def CHI  : CompareRI<"chi",  0xA7E, z_cmp, GR32, imm32sx16>;
+  def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>;
+
+  // Comparison with a signed 32-bit immediate.
+  def CFI  : CompareRIL<"cfi",  0xC2D, z_cmp, GR32, simm32>;
+  def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>;
+
+  // Comparison with memory.
+  defm CH    : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>;
+  defm C     : CompareRXPair<"c",  0x59, 0xE359, z_cmp, GR32, load>;
+  def  CGH   : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>;
+  def  CGF   : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>;
+  def  CG    : CompareRXY<"cg",  0xE320, z_cmp, GR64, load>;
+  def  CHRL  : CompareRILPC<"chrl",  0xC65, z_cmp, GR32, aligned_sextloadi16>;
+  def  CRL   : CompareRILPC<"crl",   0xC6D, z_cmp, GR32, aligned_load>;
+  def  CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>;
+  def  CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>;
+  def  CGRL  : CompareRILPC<"cgrl",  0xC68, z_cmp, GR64, aligned_load>;
+
+  // Comparison between memory and a signed 16-bit immediate.
+  def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>;
+  def CHSI  : CompareSIL<"chsi",  0xE55C, z_cmp, load,        imm32sx16>;
+  def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load,        imm64sx16>;
+}
+defm : SXB<z_cmp, GR64, CGFR>;
+
+// Unsigned comparisons.
+let Defs = [PSW] in {
+  // Comparison with a register.
+  def CLR   : CompareRR <"clr",   0x15,   z_ucmp,    GR32, GR32>;
+  def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
+  def CLGR  : CompareRRE<"clgr",  0xB921, z_ucmp,    GR64, GR64>;
+
+  // Comparison with a signed 32-bit immediate.
+  def CLFI  : CompareRIL<"clfi",  0xC2F, z_ucmp, GR32, uimm32>;
+  def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
+
+  // Comparison with memory.
+  defm CL     : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>;
+  def  CLGF   : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>;
+  def  CLG    : CompareRXY<"clg",  0xE321, z_ucmp, GR64, load>;
+  def  CLHRL  : CompareRILPC<"clhrl",  0xC67, z_ucmp, GR32,
+                             aligned_zextloadi16>;
+  def  CLRL   : CompareRILPC<"clrl",   0xC6F, z_ucmp, GR32,
+                             aligned_load>;
+  def  CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
+                             aligned_zextloadi16>;
+  def  CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
+                             aligned_zextloadi32>;
+  def  CLGRL  : CompareRILPC<"clgrl",  0xC6A, z_ucmp, GR64,
+                             aligned_load>;
+
+  // Comparison between memory and an unsigned 8-bit immediate.
+  defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>;
+
+  // Comparison between memory and an unsigned 16-bit immediate.
+  def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>;
+  def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load,        imm32zx16>;
+  def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load,        imm64zx16>;
+}
+defm : ZXB<z_ucmp, GR64, CLGFR>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def ATOMIC_SWAPW        : AtomicLoadWBinaryReg<z_atomic_swapw>;
+def ATOMIC_SWAP_32      : AtomicLoadBinaryReg32<atomic_swap_32>;
+def ATOMIC_SWAP_64      : AtomicLoadBinaryReg64<atomic_swap_64>;
+
+def ATOMIC_LOADW_AR     : AtomicLoadWBinaryReg<z_atomic_loadw_add>;
+def ATOMIC_LOADW_AFI    : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>;
+def ATOMIC_LOAD_AR      : AtomicLoadBinaryReg32<atomic_load_add_32>;
+def ATOMIC_LOAD_AHI     : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>;
+def ATOMIC_LOAD_AFI     : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>;
+def ATOMIC_LOAD_AGR     : AtomicLoadBinaryReg64<atomic_load_add_64>;
+def ATOMIC_LOAD_AGHI    : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>;
+def ATOMIC_LOAD_AGFI    : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>;
+
+def ATOMIC_LOADW_SR     : AtomicLoadWBinaryReg<z_atomic_loadw_sub>;
+def ATOMIC_LOAD_SR      : AtomicLoadBinaryReg32<atomic_load_sub_32>;
+def ATOMIC_LOAD_SGR     : AtomicLoadBinaryReg64<atomic_load_sub_64>;
+
+def ATOMIC_LOADW_NR     : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
+def ATOMIC_LOADW_NILH   : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
+def ATOMIC_LOAD_NR      : AtomicLoadBinaryReg32<atomic_load_and_32>;
+def ATOMIC_LOAD_NILL32  : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
+def ATOMIC_LOAD_NILH32  : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
+def ATOMIC_LOAD_NILF32  : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+def ATOMIC_LOAD_NGR     : AtomicLoadBinaryReg64<atomic_load_and_64>;
+def ATOMIC_LOAD_NILL    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
+def ATOMIC_LOAD_NILH    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
+def ATOMIC_LOAD_NIHL    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
+def ATOMIC_LOAD_NIHH    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
+def ATOMIC_LOAD_NILF    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
+def ATOMIC_LOAD_NIHF    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
+
+def ATOMIC_LOADW_OR     : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
+def ATOMIC_LOADW_OILH   : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
+def ATOMIC_LOAD_OR      : AtomicLoadBinaryReg32<atomic_load_or_32>;
+def ATOMIC_LOAD_OILL32  : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+def ATOMIC_LOAD_OILH32  : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+def ATOMIC_LOAD_OILF32  : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+def ATOMIC_LOAD_OGR     : AtomicLoadBinaryReg64<atomic_load_or_64>;
+def ATOMIC_LOAD_OILL    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+def ATOMIC_LOAD_OILH    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+def ATOMIC_LOAD_OIHL    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+def ATOMIC_LOAD_OIHH    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+def ATOMIC_LOAD_OILF    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+def ATOMIC_LOAD_OIHF    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+
+def ATOMIC_LOADW_XR     : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
+def ATOMIC_LOADW_XILF   : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
+def ATOMIC_LOAD_XR      : AtomicLoadBinaryReg32<atomic_load_xor_32>;
+def ATOMIC_LOAD_XILF32  : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+def ATOMIC_LOAD_XGR     : AtomicLoadBinaryReg64<atomic_load_xor_64>;
+def ATOMIC_LOAD_XILF    : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+def ATOMIC_LOAD_XIHF    : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+
+def ATOMIC_LOADW_NRi    : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
+def ATOMIC_LOADW_NILHi  : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
+                                               imm32lh16c>;
+def ATOMIC_LOAD_NRi     : AtomicLoadBinaryReg32<atomic_load_nand_32>;
+def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+                                                imm32ll16c>;
+def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+                                                imm32lh16c>;
+def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
+def ATOMIC_LOAD_NGRi    : AtomicLoadBinaryReg64<atomic_load_nand_64>;
+def ATOMIC_LOAD_NILLi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64ll16c>;
+def ATOMIC_LOAD_NILHi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64lh16c>;
+def ATOMIC_LOAD_NIHLi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64hl16c>;
+def ATOMIC_LOAD_NIHHi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64hh16c>;
+def ATOMIC_LOAD_NILFi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64lf32c>;
+def ATOMIC_LOAD_NIHFi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64hf32c>;
+
+def ATOMIC_LOADW_MIN    : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
+def ATOMIC_LOAD_MIN_32  : AtomicLoadBinaryReg32<atomic_load_min_32>;
+def ATOMIC_LOAD_MIN_64  : AtomicLoadBinaryReg64<atomic_load_min_64>;
+
+def ATOMIC_LOADW_MAX    : AtomicLoadWBinaryReg<z_atomic_loadw_max>;
+def ATOMIC_LOAD_MAX_32  : AtomicLoadBinaryReg32<atomic_load_max_32>;
+def ATOMIC_LOAD_MAX_64  : AtomicLoadBinaryReg64<atomic_load_max_64>;
+
+def ATOMIC_LOADW_UMIN   : AtomicLoadWBinaryReg<z_atomic_loadw_umin>;
+def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>;
+def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>;
+
+def ATOMIC_LOADW_UMAX   : AtomicLoadWBinaryReg<z_atomic_loadw_umax>;
+def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>;
+def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>;
+
+def ATOMIC_CMP_SWAPW
+  : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+                                  ADDR32:$bitshift, ADDR32:$negbitshift,
+                                  uimm32:$bitsize),
+           [(set GR32:$dst,
+                 (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+                                     ADDR32:$bitshift, ADDR32:$negbitshift,
+                                     uimm32:$bitsize))]> {
+  let Defs = [PSW];
+  let mayLoad = 1;
+  let mayStore = 1;
+  let usesCustomInserter = 1;
+}
+
+let Defs = [PSW] in {
+  defm CS  : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>;
+  def  CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Read a 32-bit access register into a GR32.  As with all GR32 operations,
+// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
+// when a 64-bit address is stored in a pair of access registers.
+def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src),
+                  "ear\t$dst, $src",
+                  [(set GR32:$dst, (z_extract_access access_reg:$src))]>;
+
+// Find leftmost one, AKA count leading zeros.  The instruction actually
+// returns a pair of GR64s, the first giving the number of leading zeros
+// and the second giving a copy of the source with the leftmost one bit
+// cleared.  We only use the first result here.
+let Defs = [PSW] in {
+  def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
+}
+def : Pat<(ctlz GR64:$src),
+          (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>;
+
+// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
+def : Pat<(i64 (anyext GR32:$src)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// There are no 32-bit equivalents of LLILL and LLILH, so use a full
+// 64-bit move followed by a subreg.  This preserves the invariant that
+// all GR32 operations only modify the low 32 bits.
+def : Pat<(i32 imm32ll16:$src),
+          (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>;
+def : Pat<(i32 imm32lh16:$src),
+          (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>;
+
+// Extend GR32s and GR64s to GR128s.
+let usesCustomInserter = 1 in {
+  def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+  def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>;
+  def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// Use AL* for GR64 additions of unsigned 32-bit values.
+defm : ZXB<add, GR64, ALGFR>;
+def  : Pat<(add GR64:$src1, imm64zx32:$src2),
+           (ALGFI GR64:$src1, imm64zx32:$src2)>;
+def  : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+           (ALGF GR64:$src1, bdxaddr20only:$addr)>;
+
+// Use SL* for GR64 subtractions of unsigned 32-bit values.
+defm : ZXB<sub, GR64, SLGFR>;
+def  : Pat<(add GR64:$src1, imm64zx32n:$src2),
+           (SLGFI GR64:$src1, imm64zx32n:$src2)>;
+def  : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+           (SLGF GR64:$src1, bdxaddr20only:$addr)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
new file mode 100644
index 0000000..5d83321
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -0,0 +1,116 @@
+//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCInstLower.h"
+#include "SystemZAsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+// Where relaxable pairs of reloc-generating instructions exist,
+// we tend to use the longest form by default, since that produces
+// correct assembly in cases where no relaxation is performed.
+// If Opcode is one such instruction, return the opcode for the
+// shortest possible form instead, otherwise return Opcode itself.
+static unsigned getShortenedInstr(unsigned Opcode) {
+  switch (Opcode) {
+  case SystemZ::BRCL:  return SystemZ::BRC;
+  case SystemZ::JG:    return SystemZ::J;
+  case SystemZ::BRASL: return SystemZ::BRAS;
+  }
+  return Opcode;
+}
+
+// Return the VK_* enumeration for MachineOperand target flags Flags.
+static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
+  switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) {
+    case 0:
+      return MCSymbolRefExpr::VK_None;
+    case SystemZII::MO_GOT:
+      return MCSymbolRefExpr::VK_GOT;
+  }
+  llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
+}
+
+SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+                                       SystemZAsmPrinter &asmprinter)
+  : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {}
+
+MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+                                                 const MCSymbol *Symbol,
+                                                 int64_t Offset) const {
+  MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
+  if (Offset) {
+    const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+    Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+  }
+  return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
+  switch (MO.getType()) {
+  default:
+    llvm_unreachable("unknown operand type");
+
+  case MachineOperand::MO_Register:
+    // Ignore all implicit register operands.
+    if (MO.isImplicit())
+      return MCOperand();
+    return MCOperand::CreateReg(MO.getReg());
+
+  case MachineOperand::MO_Immediate:
+    return MCOperand::CreateImm(MO.getImm());
+
+  case MachineOperand::MO_MachineBasicBlock:
+    return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(),
+                              /* MO has no offset field */0);
+
+  case MachineOperand::MO_GlobalAddress:
+    return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()),
+                              MO.getOffset());
+
+  case MachineOperand::MO_ExternalSymbol: {
+    StringRef Name = MO.getSymbolName();
+    return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name),
+                              MO.getOffset());
+  }
+
+  case MachineOperand::MO_JumpTableIndex:
+    return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()),
+                              /* MO has no offset field */0);
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()),
+                              MO.getOffset());
+
+  case MachineOperand::MO_BlockAddress: {
+    const BlockAddress *BA = MO.getBlockAddress();
+    return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA),
+                              MO.getOffset());
+  }
+  }
+}
+
+void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+  unsigned Opcode = MI->getOpcode();
+  // When emitting binary code, start with the shortest form of an instruction
+  // and then relax it where necessary.
+  if (!AsmPrinter.OutStreamer.hasRawTextSupport())
+    Opcode = getShortenedInstr(Opcode);
+  OutMI.setOpcode(Opcode);
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    MCOperand MCOp = lowerOperand(MO);
+    if (MCOp.isValid())
+      OutMI.addOperand(MCOp);
+  }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
new file mode 100644
index 0000000..afa72f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -0,0 +1,47 @@
+//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZMCINSTLOWER_H
+#define LLVM_SYSTEMZMCINSTLOWER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCContext;
+class MCInst;
+class MCOperand;
+class MCSymbol;
+class MachineInstr;
+class MachineOperand;
+class Mangler;
+class SystemZAsmPrinter;
+
+class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
+  Mangler *Mang;
+  MCContext &Ctx;
+  SystemZAsmPrinter &AsmPrinter;
+
+public:
+  SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+                     SystemZAsmPrinter &asmPrinter);
+
+  // Lower MachineInstr MI to MCInst OutMI.
+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  // Return an MCOperand for MO.  Return an empty operand if MO is implicit.
+  MCOperand lowerOperand(const MachineOperand& MO) const;
+
+  // Return an MCOperand for MO, given that it equals Symbol + Offset.
+  MCOperand lowerSymbolOperand(const MachineOperand &MO,
+                               const MCSymbol *Symbol, int64_t Offset) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 0000000..1dc05a7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,74 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+  unsigned SavedGPRFrameSize;
+  unsigned LowSavedGPR;
+  unsigned HighSavedGPR;
+  unsigned VarArgsFirstGPR;
+  unsigned VarArgsFirstFPR;
+  unsigned VarArgsFrameIndex;
+  unsigned RegSaveFrameIndex;
+  bool ManipulatesSP;
+
+public:
+  explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+    : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0),
+      VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0),
+      ManipulatesSP(false) {}
+
+  // Get and set the number of bytes allocated by generic code to store
+  // call-saved GPRs.
+  unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; }
+  void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; }
+
+  // Get and set the first call-saved GPR that should be saved and restored
+  // by this function.  This is 0 if no GPRs need to be saved or restored.
+  unsigned getLowSavedGPR() const { return LowSavedGPR; }
+  void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; }
+
+  // Get and set the last call-saved GPR that should be saved and restored
+  // by this function.
+  unsigned getHighSavedGPR() const { return HighSavedGPR; }
+  void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; }
+
+  // Get and set the number of fixed (as opposed to variable) arguments
+  // that are passed in GPRs to this function.
+  unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+  void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+
+  // Likewise FPRs.
+  unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
+  void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; }
+
+  // Get and set the frame index of the first stack vararg.
+  unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
+
+  // Get and set the frame index of the register save area
+  // (i.e. the incoming stack pointer).
+  unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+  void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; }
+
+  // Get and set whether the function directly manipulates the stack pointer,
+  // e.g. through STACKSAVE or STACKRESTORE.
+  bool getManipulatesSP() const { return ManipulatesSP; }
+  void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 0000000..0abc3f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,435 @@
+//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions
+//===----------------------------------------------------------------------===//
+
+class ImmediateAsmOperand<string name>
+  : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addImmOperands";
+}
+
+// Constructs both a DAG pattern and instruction operand for an immediate
+// of type VT.  PRED returns true if a node is acceptable and XFORM returns
+// the operand value associated with the node.  ASMOP is the name of the
+// associated asm operand, and also forms the basis of the asm print method.
+class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
+  : PatLeaf<(vt imm), pred, xform>, Operand<vt> {
+  let PrintMethod = "print"##asmop##"Operand";
+  let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+}
+
+// Constructs both a DAG pattern and instruction operand for a PC-relative
+// address with address size VT.  SELF is the name of the operand.
+class PCRelAddress<ValueType vt, string self>
+  : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>,
+    Operand<vt> {
+  let MIOperandInfo = (ops !cast<Operand>(self));
+}
+
+// Constructs an AsmOperandClass for addressing mode FORMAT, treating the
+// registers as having BITSIZE bits and displacements as having DISPSIZE bits.
+class AddressAsmOperand<string format, string bitsize, string dispsize>
+  : AsmOperandClass {
+  let Name = format##bitsize##"Disp"##dispsize;
+  let ParserMethod = "parse"##format##bitsize;
+  let RenderMethod = "add"##format##"Operands";
+}
+
+// Constructs both a DAG pattern and instruction operand for an addressing mode.
+// The mode is selected by custom code in selectTYPE...SUFFIX().  The address
+// registers have BITSIZE bits and displacements have DISPSIZE bits.  NUMOPS is
+// the number of operands that make up an address and OPERANDS lists the types
+// of those operands using (ops ...).  FORMAT is the type of addressing mode,
+// which needs to match the names used in AddressAsmOperand.
+class AddressingMode<string type, string bitsize, string dispsize,
+                     string suffix, int numops, string format, dag operands>
+  : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
+                   "select"##type##dispsize##suffix,
+                   [add, sub, or, frameindex, z_adjdynalloc]>,
+    Operand<!cast<ValueType>("i"##bitsize)> {
+  let PrintMethod = "print"##format##"Operand";
+  let MIOperandInfo = operands;
+  let ParserMatchClass =
+    !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize);
+}
+
+// An addressing mode with a base and displacement but no index.
+class BDMode<string type, string bitsize, string dispsize, string suffix>
+  : AddressingMode<type, bitsize, dispsize, suffix, 2, "BDAddr",
+                   (ops !cast<RegisterOperand>("ADDR"##bitsize),
+                        !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>;
+
+// An addressing mode with a base, displacement and index.
+class BDXMode<string type, string bitsize, string dispsize, string suffix>
+  : AddressingMode<type, bitsize, dispsize, suffix, 3, "BDXAddr",
+                   (ops !cast<RegisterOperand>("ADDR"##bitsize),
+                        !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+                        !cast<RegisterOperand>("ADDR"##bitsize))>;
+
+//===----------------------------------------------------------------------===//
+// Extracting immediate operands from nodes
+// These all create MVT::i64 nodes to ensure the value is not sign-extended
+// when converted from an SDNode to a MachineOperand later on.
+//===----------------------------------------------------------------------===//
+
+// Bits 0-15 (counting from the lsb).
+def LL16 : SDNodeXForm<imm, [{
+  uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 16-31 (counting from the lsb).
+def LH16 : SDNodeXForm<imm, [{
+  uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 32-47 (counting from the lsb).
+def HL16 : SDNodeXForm<imm, [{
+  uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 48-63 (counting from the lsb).
+def HH16 : SDNodeXForm<imm, [{
+  uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Low 32 bits.
+def LF32 : SDNodeXForm<imm, [{
+  uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// High 32 bits.
+def HF32 : SDNodeXForm<imm, [{
+  uint64_t Value = N->getZExtValue() >> 32;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit signed quantity.
+def SIMM8 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit unsigned quantity.
+def UIMM8 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit signed quantity.
+def SIMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit unsigned quantity.
+def UIMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit signed quantity.
+def SIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit unsigned quantity.
+def UIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Negate and then truncate an immediate to a 32-bit unsigned quantity.
+def NEGIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), MVT::i64);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediate asm operands.
+//===----------------------------------------------------------------------===//
+
+def U4Imm  : ImmediateAsmOperand<"U4Imm">;
+def U6Imm  : ImmediateAsmOperand<"U6Imm">;
+def S8Imm  : ImmediateAsmOperand<"S8Imm">;
+def U8Imm  : ImmediateAsmOperand<"U8Imm">;
+def S16Imm : ImmediateAsmOperand<"S16Imm">;
+def U16Imm : ImmediateAsmOperand<"U16Imm">;
+def S32Imm : ImmediateAsmOperand<"S32Imm">;
+def U32Imm : ImmediateAsmOperand<"U32Imm">;
+
+//===----------------------------------------------------------------------===//
+// 8-bit immediates
+//===----------------------------------------------------------------------===//
+
+def uimm8zx4 : Immediate<i8, [{
+  return isUInt<4>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U4Imm">;
+
+def uimm8zx6 : Immediate<i8, [{
+  return isUInt<6>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U6Imm">;
+
+def simm8    : Immediate<i8, [{}], SIMM8, "S8Imm">;
+def uimm8    : Immediate<i8, [{}], UIMM8, "U8Imm">;
+
+//===----------------------------------------------------------------------===//
+// i32 immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being zero.
+def imm32ll16 : Immediate<i32, [{
+  return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm32lh16 : Immediate<i32, [{
+  return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being one.
+def imm32ll16c : Immediate<i32, [{
+  return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm32lh16c : Immediate<i32, [{
+  return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+// Short immediates
+def imm32sx8 : Immediate<i32, [{
+  return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm32zx8 : Immediate<i32, [{
+  return isUInt<8>(N->getZExtValue());
+}], UIMM8, "U8Imm">;
+
+def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
+
+def imm32sx16 : Immediate<i32, [{
+  return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm32zx16 : Immediate<i32, [{
+  return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+
+// Full 32-bit immediates.  we need both signed and unsigned versions
+// because the assembler is picky.  E.g. AFI requires signed operands
+// while NILF requires unsigned ones.
+def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
+def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
+
+def imm32 : ImmLeaf<i32, [{}]>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being zero.
+def imm64ll16 : Immediate<i64, [{
+  return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm64lh16 : Immediate<i64, [{
+  return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+def imm64hl16 : Immediate<i64, [{
+  return SystemZ::isImmHL(N->getZExtValue());
+}], HL16, "U16Imm">;
+
+def imm64hh16 : Immediate<i64, [{
+  return SystemZ::isImmHH(N->getZExtValue());
+}], HH16, "U16Imm">;
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being one.
+def imm64ll16c : Immediate<i64, [{
+  return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm64lh16c : Immediate<i64, [{
+  return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+def imm64hl16c : Immediate<i64, [{
+  return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
+}], HL16, "U16Imm">;
+
+def imm64hh16c : Immediate<i64, [{
+  return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
+}], HH16, "U16Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being zero.
+def imm64lf32 : Immediate<i64, [{
+  return SystemZ::isImmLF(N->getZExtValue());
+}], LF32, "U32Imm">;
+
+def imm64hf32 : Immediate<i64, [{
+  return SystemZ::isImmHF(N->getZExtValue());
+}], HF32, "U32Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being one.
+def imm64lf32c : Immediate<i64, [{
+  return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
+}], LF32, "U32Imm">;
+
+def imm64hf32c : Immediate<i64, [{
+  return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
+}], HF32, "U32Imm">;
+
+// Short immediates.
+def imm64sx8 : Immediate<i64, [{
+  return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm64sx16 : Immediate<i64, [{
+  return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm64zx16 : Immediate<i64, [{
+  return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm64sx32 : Immediate<i64, [{
+  return isInt<32>(N->getSExtValue());
+}], SIMM32, "S32Imm">;
+
+def imm64zx32 : Immediate<i64, [{
+  return isUInt<32>(N->getZExtValue());
+}], UIMM32, "U32Imm">;
+
+def imm64zx32n : Immediate<i64, [{
+  return isUInt<32>(-N->getSExtValue());
+}], NEGIMM32, "U32Imm">;
+
+def imm64 : ImmLeaf<i64, [{}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediates
+//===----------------------------------------------------------------------===//
+
+// Floating-point zero.
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
+// Floating point negative zero.
+def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
+
+//===----------------------------------------------------------------------===//
+// Symbolic address operands
+//===----------------------------------------------------------------------===//
+
+// PC-relative offsets of a basic block.  The offset is sign-extended
+// and multiplied by 2.
+def brtarget16 : Operand<OtherVT> {
+  let EncoderMethod = "getPC16DBLEncoding";
+}
+def brtarget32 : Operand<OtherVT> {
+  let EncoderMethod = "getPC32DBLEncoding";
+}
+
+// A PC-relative offset of a global value.  The offset is sign-extended
+// and multiplied by 2.
+def pcrel32 : PCRelAddress<i64, "pcrel32"> {
+  let EncoderMethod = "getPC32DBLEncoding";
+}
+
+// A PC-relative offset of a global value when the value is used as a
+// call target.  The offset is sign-extended and multiplied by 2.
+def pcrel16call : PCRelAddress<i64, "pcrel16call"> {
+  let PrintMethod = "printCallOperand";
+  let EncoderMethod = "getPLT16DBLEncoding";
+}
+def pcrel32call : PCRelAddress<i64, "pcrel32call"> {
+  let PrintMethod = "printCallOperand";
+  let EncoderMethod = "getPLT32DBLEncoding";
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing modes
+//===----------------------------------------------------------------------===//
+
+// 12-bit displacement operands.
+def disp12imm32 : Operand<i32>;
+def disp12imm64 : Operand<i64>;
+
+// 20-bit displacement operands.
+def disp20imm32 : Operand<i32>;
+def disp20imm64 : Operand<i64>;
+
+def BDAddr32Disp12  : AddressAsmOperand<"BDAddr",  "32", "12">;
+def BDAddr32Disp20  : AddressAsmOperand<"BDAddr",  "32", "20">;
+def BDAddr64Disp12  : AddressAsmOperand<"BDAddr",  "64", "12">;
+def BDAddr64Disp20  : AddressAsmOperand<"BDAddr",  "64", "20">;
+def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">;
+def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
+
+// DAG patterns and operands for addressing modes.  Each mode has
+// the form <type><range><group> where:
+//
+// <type> is one of:
+//   shift    : base + displacement (32-bit)
+//   bdaddr   : base + displacement
+//   bdxaddr  : base + displacement + index
+//   laaddr   : like bdxaddr, but used for Load Address operations
+//   dynalloc : base + displacement + index + ADJDYNALLOC
+//
+// <range> is one of:
+//   12       : the displacement is an unsigned 12-bit value
+//   20       : the displacement is a signed 20-bit value
+//
+// <group> is one of:
+//   pair     : used when there is an equivalent instruction with the opposite
+//              range value (12 or 20)
+//   only     : used when there is no equivalent instruction with the opposite
+//              range value
+def shift12only      : BDMode <"BDAddr",   "32", "12", "Only">;
+def shift20only      : BDMode <"BDAddr",   "32", "20", "Only">;
+def bdaddr12only     : BDMode <"BDAddr",   "64", "12", "Only">;
+def bdaddr12pair     : BDMode <"BDAddr",   "64", "12", "Pair">;
+def bdaddr20only     : BDMode <"BDAddr",   "64", "20", "Only">;
+def bdaddr20pair     : BDMode <"BDAddr",   "64", "20", "Pair">;
+def bdxaddr12only    : BDXMode<"BDXAddr",  "64", "12", "Only">;
+def bdxaddr12pair    : BDXMode<"BDXAddr",  "64", "12", "Pair">;
+def bdxaddr20only    : BDXMode<"BDXAddr",  "64", "20", "Only">;
+def bdxaddr20only128 : BDXMode<"BDXAddr",  "64", "20", "Only128">;
+def bdxaddr20pair    : BDXMode<"BDXAddr",  "64", "20", "Pair">;
+def dynalloc12only   : BDXMode<"DynAlloc", "64", "12", "Only">;
+def laaddr12pair     : BDXMode<"LAAddr",   "64", "12", "Pair">;
+def laaddr20pair     : BDXMode<"LAAddr",   "64", "20", "Pair">;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous
+//===----------------------------------------------------------------------===//
+
+// Access registers.  At present we just use them for accessing the thread
+// pointer, so we don't expose them as register to LLVM.
+def AccessReg : AsmOperandClass {
+  let Name = "AccessReg";
+  let ParserMethod = "parseAccessReg";
+}
+def access_reg : Immediate<i8, [{ return N->getZExtValue() < 16; }],
+                           NOOP_SDNodeXForm, "AccessReg"> {
+  let ParserMatchClass = AccessReg;
+}
+
+// A 4-bit condition-code mask.
+def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>,
+            Operand<i8> {
+  let PrintMethod = "printCond4Operand";
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
new file mode 100644
index 0000000..8c4df56
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -0,0 +1,196 @@
+//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Type profiles
+//===----------------------------------------------------------------------===//
+def SDT_CallSeqStart        : SDCallSeqStart<[SDTCisVT<0, i64>]>;
+def SDT_CallSeqEnd          : SDCallSeqEnd<[SDTCisVT<0, i64>,
+                                            SDTCisVT<1, i64>]>;
+def SDT_ZCall               : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_ZCmp                : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ZBRCCMask           : SDTypeProfile<0, 2,
+                                            [SDTCisVT<0, i8>,
+                                             SDTCisVT<1, OtherVT>]>;
+def SDT_ZSelectCCMask       : SDTypeProfile<1, 3,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<1, 2>,
+                                             SDTCisVT<3, i8>]>;
+def SDT_ZWrapPtr            : SDTypeProfile<1, 1,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisPtrTy<0>]>;
+def SDT_ZAdjDynAlloc        : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZExtractAccess      : SDTypeProfile<1, 1,
+                                            [SDTCisVT<0, i32>,
+                                             SDTCisVT<1, i8>]>;
+def SDT_ZGR128Binary32      : SDTypeProfile<1, 2,
+                                            [SDTCisVT<0, untyped>,
+                                             SDTCisVT<1, untyped>,
+                                             SDTCisVT<2, i32>]>;
+def SDT_ZGR128Binary64      : SDTypeProfile<1, 2,
+                                            [SDTCisVT<0, untyped>,
+                                             SDTCisVT<1, untyped>,
+                                             SDTCisVT<2, i64>]>;
+def SDT_ZAtomicLoadBinaryW  : SDTypeProfile<1, 5,
+                                            [SDTCisVT<0, i32>,
+                                             SDTCisPtrTy<1>,
+                                             SDTCisVT<2, i32>,
+                                             SDTCisVT<3, i32>,
+                                             SDTCisVT<4, i32>,
+                                             SDTCisVT<5, i32>]>;
+def SDT_ZAtomicCmpSwapW     : SDTypeProfile<1, 6,
+                                            [SDTCisVT<0, i32>,
+                                             SDTCisPtrTy<1>,
+                                             SDTCisVT<2, i32>,
+                                             SDTCisVT<3, i32>,
+                                             SDTCisVT<4, i32>,
+                                             SDTCisVT<5, i32>,
+                                             SDTCisVT<6, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// Node definitions
+//===----------------------------------------------------------------------===//
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start       : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+                                 [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+def callseq_end         : SDNode<"ISD::CALLSEQ_END",   SDT_CallSeqEnd,
+                                 [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
+                                  SDNPOutGlue]>;
+
+// Nodes for SystemZISD::*.  See SystemZISelLowering.h for more details.
+def z_retflag           : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+                                 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def z_call              : SDNode<"SystemZISD::CALL", SDT_ZCall,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                                  SDNPVariadic]>;
+def z_pcrel_wrapper     : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
+def z_cmp               : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_ucmp              : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_br_ccmask         : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
+                                 [SDNPHasChain, SDNPInGlue]>;
+def z_select_ccmask     : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
+    		                 [SDNPInGlue]>;
+def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_extract_access    : SDNode<"SystemZISD::EXTRACT_ACCESS",
+                                 SDT_ZExtractAccess>;
+def z_umul_lohi64       : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
+def z_sdivrem64         : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
+def z_udivrem32         : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
+def z_udivrem64         : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
+
+class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
+  : SDNode<"SystemZISD::"##name, profile,
+           [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+
+def z_atomic_swapw      : AtomicWOp<"ATOMIC_SWAPW">;
+def z_atomic_loadw_add  : AtomicWOp<"ATOMIC_LOADW_ADD">;
+def z_atomic_loadw_sub  : AtomicWOp<"ATOMIC_LOADW_SUB">;
+def z_atomic_loadw_and  : AtomicWOp<"ATOMIC_LOADW_AND">;
+def z_atomic_loadw_or   : AtomicWOp<"ATOMIC_LOADW_OR">;
+def z_atomic_loadw_xor  : AtomicWOp<"ATOMIC_LOADW_XOR">;
+def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">;
+def z_atomic_loadw_min  : AtomicWOp<"ATOMIC_LOADW_MIN">;
+def z_atomic_loadw_max  : AtomicWOp<"ATOMIC_LOADW_MAX">;
+def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
+def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
+def z_atomic_cmp_swapw  : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Register sign-extend operations.  Sub-32-bit values are represented as i32s.
+def sext8  : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
+def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
+def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
+
+// Register zero-extend operations.  Sub-32-bit values are represented as i32s.
+def zext8  : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
+def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
+def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
+
+// Typed floating-point loads.
+def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
+def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
+
+// Aligned loads.
+class AlignedLoad<SDPatternOperator load>
+  : PatFrag<(ops node:$addr), (load node:$addr), [{
+  LoadSDNode *Load = cast<LoadSDNode>(N);
+  return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
+}]>;
+def aligned_load        : AlignedLoad<load>;
+def aligned_sextloadi16 : AlignedLoad<sextloadi16>;
+def aligned_sextloadi32 : AlignedLoad<sextloadi32>;
+def aligned_zextloadi16 : AlignedLoad<zextloadi16>;
+def aligned_zextloadi32 : AlignedLoad<zextloadi32>;
+
+// Aligned stores.
+class AlignedStore<SDPatternOperator store>
+  : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
+  StoreSDNode *Store = cast<StoreSDNode>(N);
+  return Store->getAlignment() >= Store->getMemoryVT().getStoreSize();
+}]>;
+def aligned_store         : AlignedStore<store>;
+def aligned_truncstorei16 : AlignedStore<truncstorei16>;
+def aligned_truncstorei32 : AlignedStore<truncstorei32>;
+
+// Insertions.
+def inserti8 : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, -256), node:$src2)>;
+def insertll : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffffffffffff0000), node:$src2)>;
+def insertlh : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffffffff0000ffff), node:$src2)>;
+def inserthl : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffff0000ffffffff), node:$src2)>;
+def inserthh : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0x0000ffffffffffff), node:$src2)>;
+def insertlf : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffffffff00000000), node:$src2)>;
+def inserthf : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0x00000000ffffffff), node:$src2)>;
+
+// ORs that can be treated as insertions.
+def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2),
+                             (or node:$src1, node:$src2), [{
+  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  return CurDAG->MaskedValueIsZero(N->getOperand(0),
+                                   APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// ORs that can be treated as reversed insertions.
+def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
+                                (or node:$src1, node:$src2), [{
+  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  return CurDAG->MaskedValueIsZero(N->getOperand(1),
+                                   APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// Fused multiply-add and multiply-subtract, but with the order of the
+// operands matching SystemZ's MA and MS instructions.
+def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                    (fma node:$src2, node:$src3, node:$src1)>;
+def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                    (fma node:$src2, node:$src3, (fneg node:$src1))>;
+
+// Floating-point negative absolute.
+def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
+
+// Create a unary operator that loads from memory and then performs
+// the given operation on it.
+class loadu<SDPatternOperator operator>
+  : PatFrag<(ops node:$addr), (operator (load node:$addr))>;
+
+// Create a store operator that performs the given unary operation
+// on the value before storing it.
+class storeu<SDPatternOperator operator>
+  : PatFrag<(ops node:$value, node:$addr),
+            (store (operator node:$value), node:$addr)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
new file mode 100644
index 0000000..3689f74
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -0,0 +1,71 @@
+//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Record that INSN performs a 64-bit version of unary operator OPERATOR
+// in which the operand is sign-extended from 32 to 64 bits.
+multiclass SXU<SDPatternOperator operator, Instruction insn> {
+  def : Pat<(operator (sext (i32 GR32:$src))),
+            (insn GR32:$src)>;
+  def : Pat<(operator (sext_inreg GR64:$src, i32)),
+            (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+}
+
+// Record that INSN performs a 64-bit version of binary operator OPERATOR
+// in which the first operand has class CLS and which the second operand
+// is sign-extended from a 32-bit register.
+multiclass SXB<SDPatternOperator operator, RegisterOperand cls,
+               Instruction insn> {
+  def : Pat<(operator cls:$src1, (sext GR32:$src2)),
+            (insn cls:$src1, GR32:$src2)>;
+  def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)),
+            (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+}
+
+// Like SXB, but for zero extension.
+multiclass ZXB<SDPatternOperator operator, RegisterOperand cls,
+               Instruction insn> {
+  def : Pat<(operator cls:$src1, (zext GR32:$src2)),
+            (insn cls:$src1, GR32:$src2)>;
+  def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)),
+            (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+}
+
+// Record that INSN performs a binary read-modify-write operation,
+// with LOAD, OPERATOR and STORE being the read, modify and write
+// respectively.  MODE is the addressing mode and IMM is the type
+// of the second operand.
+class RMWI<SDPatternOperator load, SDPatternOperator operator,
+           SDPatternOperator store, AddressingMode mode,
+           PatFrag imm, Instruction insn>
+  : Pat<(store (operator (load mode:$addr), imm:$src), mode:$addr),
+        (insn mode:$addr, (UIMM8 imm:$src))>;
+
+// Record that INSN performs binary operation OPERATION on a byte
+// memory location.  IMM is the type of the second operand.
+multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
+                    Instruction insn> {
+  def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>;
+  def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>;
+  def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>;
+  def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>;
+  def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>;
+  def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>;
+}
+
+// Record that INSN performs insertion TYPE into a register of class CLS.
+// The inserted operand is loaded using LOAD from an address of mode MODE.
+multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
+                     SDPatternOperator load, AddressingMode mode> {
+  def : Pat<(!cast<SDPatternOperator>("or_as_"##type)
+              cls:$src1, (load mode:$src2)),
+            (insn cls:$src1, mode:$src2)>;
+  def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type)
+              (load mode:$src2), cls:$src1),
+            (insn cls:$src1, mode:$src2)>;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 0000000..a0ae7ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,162 @@
+//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZRegisterInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+                                         const SystemZInstrInfo &tii)
+  : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm), TII(tii) {}
+
+const uint16_t*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const uint16_t CalleeSavedRegs[] = {
+    SystemZ::R6D,  SystemZ::R7D,  SystemZ::R8D,  SystemZ::R9D,
+    SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+    SystemZ::R14D, SystemZ::R15D,
+    SystemZ::F8D,  SystemZ::F9D,  SystemZ::F10D, SystemZ::F11D,
+    SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+BitVector
+SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF)) {
+    // R11D is the frame pointer.  Reserve all aliases.
+    Reserved.set(SystemZ::R11D);
+    Reserved.set(SystemZ::R11W);
+    Reserved.set(SystemZ::R10Q);
+  }
+
+  // R15D is the stack pointer.  Reserve all aliases.
+  Reserved.set(SystemZ::R15D);
+  Reserved.set(SystemZ::R15W);
+  Reserved.set(SystemZ::R14Q);
+  return Reserved;
+}
+
+bool
+SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+					   MachineBasicBlock::iterator SaveMBBI,
+					   MachineBasicBlock::iterator &UseMBBI,
+					   const TargetRegisterClass *RC,
+					   unsigned Reg) const {
+  MachineFunction &MF = *MBB.getParent();
+  const SystemZFrameLowering *TFI =
+    static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+  unsigned Base = getFrameRegister(MF);
+  uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF);
+  DebugLoc DL;
+
+  unsigned LoadOpcode, StoreOpcode;
+  TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+
+  // The offset must always be in range of a 12-bit unsigned displacement.
+  BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode))
+    .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0);
+  BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg)
+    .addReg(Base).addImm(Offset).addReg(0);
+  return true;
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                         int SPAdj, unsigned FIOperandNum,
+                                         RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Outgoing arguments should be part of the frame");
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Decompose the frame index into a base and offset.
+  int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
+  unsigned BasePtr = getFrameRegister(MF);
+  int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) +
+                    MI->getOperand(FIOperandNum + 1).getImm());
+
+  // Special handling of dbg_value instructions.
+  if (MI->isDebugValue()) {
+    MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false);
+    MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // See if the offset is in range, or if an equivalent instruction that
+  // accepts the offset exists.
+  unsigned Opcode = MI->getOpcode();
+  unsigned OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset);
+  if (OpcodeForOffset)
+    MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+  else {
+    // Create an anchor point that is in range.  Start at 0xffff so that
+    // can use LLILH to load the immediate.
+    int64_t OldOffset = Offset;
+    int64_t Mask = 0xffff;
+    do {
+      Offset = OldOffset & Mask;
+      OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset);
+      Mask >>= 1;
+      assert(Mask && "One offset must be OK");
+    } while (!OpcodeForOffset);
+
+    unsigned ScratchReg =
+      MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+    int64_t HighOffset = OldOffset - Offset;
+
+    if (MI->getDesc().TSFlags & SystemZII::HasIndex
+        && MI->getOperand(FIOperandNum + 2).getReg() == 0) {
+      // Load the offset into the scratch register and use it as an index.
+      // The scratch register then dies here.
+      TII.loadImmediate(MBB, MI, ScratchReg, HighOffset);
+      MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+      MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg,
+                                                        false, false, true);
+    } else {
+      // Load the anchor address into a scratch register.
+      unsigned LAOpcode = TII.getOpcodeForOffset(SystemZ::LA, HighOffset);
+      if (LAOpcode)
+        BuildMI(MBB, MI, DL, TII.get(LAOpcode),ScratchReg)
+          .addReg(BasePtr).addImm(HighOffset).addReg(0);
+      else {
+        // Load the high offset into the scratch register and use it as
+        // an index.
+        TII.loadImmediate(MBB, MI, ScratchReg, HighOffset);
+        BuildMI(MBB, MI, DL, TII.get(SystemZ::AGR),ScratchReg)
+          .addReg(ScratchReg, RegState::Kill).addReg(BasePtr);
+      }
+
+      // Use the scratch register as the base.  It then dies here.
+      MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg,
+                                                    false, false, true);
+    }
+  }
+  MI->setDesc(TII.get(OpcodeForOffset));
+  MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 0000000..91a70de
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,70 @@
+//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "SystemZ.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
+
+namespace llvm {
+
+namespace SystemZ {
+  // Return the subreg to use for referring to the even and odd registers
+  // in a GR128 pair.  Is32Bit says whether we want a GR32 or GR64.
+  inline unsigned even128(bool Is32bit) {
+    return Is32bit ? subreg_32bit : subreg_high;
+  }
+  inline unsigned odd128(bool Is32bit) {
+    return Is32bit ? subreg_low32 : subreg_low;
+  }
+}
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+private:
+  SystemZTargetMachine &TM;
+  const SystemZInstrInfo &TII;
+
+public:
+  SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+  // Override TargetRegisterInfo.h.
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+    LLVM_OVERRIDE {
+    return true;
+  }
+  virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const
+    LLVM_OVERRIDE {
+    return true;
+  }
+  virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0)
+    const LLVM_OVERRIDE;
+  virtual BitVector getReservedRegs(const MachineFunction &MF)
+    const LLVM_OVERRIDE;
+  virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator SaveMBBI,
+                                     MachineBasicBlock::iterator &UseMBBI,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Reg) const LLVM_OVERRIDE;
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                   int SPAdj, unsigned FIOperandNum,
+                                   RegScavenger *RS) const LLVM_OVERRIDE;
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const
+    LLVM_OVERRIDE;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 0000000..bd1b563
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,150 @@
+//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions.
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+  let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  let Namespace = "SystemZ";
+}
+
+let Namespace = "SystemZ" in {
+def subreg_32bit  : SubRegIndex; // could also be known as "subreg_high32"
+def subreg_high   : SubRegIndex;
+def subreg_low    : SubRegIndex;
+def subreg_low32  : SubRegIndex<[subreg_low, subreg_32bit]>;
+}
+
+// Define a register class that contains values of type TYPE and an
+// associated operand called NAME.  SIZE is the size and alignment
+// of the registers and REGLIST is the list of individual registers.
+multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> {
+  def AsmOperand : AsmOperandClass {
+    let Name = name;
+    let ParserMethod = "parse"##name;
+    let RenderMethod = "addRegOperands";
+  }
+  def Bit : RegisterClass<"SystemZ", [type], size, regList> {
+    let Size = size;
+  }
+  def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
+    let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand");
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// General-purpose registers
+//===----------------------------------------------------------------------===//
+
+// Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<16> num, string n> : SystemZReg<n> {
+  let HWEncoding = num;
+}
+
+// One of the 16 64-bit general-purpose registers.
+class GPR64<bits<16> num, string n, GPR32 low>
+ : SystemZRegWithSubregs<n, [low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_32bit];
+}
+
+// 8 even-odd pairs of GPR64s.
+class GPR128<bits<16> num, string n, GPR64 high, GPR64 low>
+ : SystemZRegWithSubregs<n, [high, low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_high, subreg_low];
+}
+
+// General-purpose registers
+foreach I = 0-15 in {
+  def R#I#W : GPR32<I, "r"#I>;
+  def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>;
+}
+
+foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
+  def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"),
+                     !cast<GPR64>("R"#!add(I, 1)#"D")>;
+}
+
+/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW",  0, 5),
+                                                  (sequence "R%uW", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD",  0, 5),
+                                                  (sequence "R%uD", 15, 6))>;
+
+// The architecture doesn't really have any i128 support, so model the
+// register pairs as untyped instead.
+defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
+                                                         R12Q, R10Q, R8Q, R6Q,
+                                                         R14Q)>;
+
+// Base and index registers.  Everything except R0, which in an address
+// context evaluates as 0.
+defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>;
+defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
+
+// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
+// of a GR128.
+defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point registers
+//===----------------------------------------------------------------------===//
+
+// Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPR32<bits<16> num, string n> : SystemZReg<n> {
+  let HWEncoding = num;
+}
+
+// One of the 16 64-bit floating-point registers
+class FPR64<bits<16> num, string n, FPR32 low>
+ : SystemZRegWithSubregs<n, [low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_32bit];
+}
+
+// 8 pairs of FPR64s, with a one-register gap inbetween.
+class FPR128<bits<16> num, string n, FPR64 high, FPR64 low>
+ : SystemZRegWithSubregs<n, [high, low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_high, subreg_low];
+}
+
+// Floating-point registers
+foreach I = 0-15 in {
+  def F#I#S : FPR32<I, "f"#I>;
+  def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
+              DwarfRegNum<[!add(I, 16)]>;
+}
+
+foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
+  def F#I#Q  : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"),
+                     !cast<FPR64>("F"#!add(I, 2)#"D")>;
+}
+
+// There's no store-multiple instruction for FPRs, so we're not fussy
+// about the order in which call-saved registers are allocated.
+defm FP32  : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>;
+defm FP64  : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>;
+defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q,
+                                                      F8Q, F9Q, F12Q, F13Q)>;
+
+//===----------------------------------------------------------------------===//
+// Other registers
+//===----------------------------------------------------------------------===//
+
+// Status register
+def PSW : SystemZReg<"psw">;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 0000000..cfd3324
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,56 @@
+//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "llvm/IR/GlobalValue.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT,
+                                   const std::string &CPU,
+                                   const std::string &FS)
+  : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "z10";
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+}
+
+// Return true if GV binds locally under reloc model RM.
+static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
+  // For non-PIC, all symbols bind locally.
+  if (RM == Reloc::Static)
+    return true;
+
+  return GV->hasLocalLinkage() || !GV->hasDefaultVisibility();
+}
+
+bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
+                                       Reloc::Model RM,
+                                       CodeModel::Model CM) const {
+  // PC32DBL accesses require the low bit to be clear.  Note that a zero
+  // value selects the default alignment and is therefore OK.
+  if (GV->getAlignment() == 1)
+    return false;
+
+  // For the small model, all locally-binding symbols are in range.
+  if (CM == CodeModel::Small)
+    return bindsLocally(GV, RM);
+
+  // For Medium and above, assume that the symbol is not within the 4GB range.
+  // Taking the address of locally-defined text would be OK, but that
+  // case isn't easy to detect.
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 0000000..8d4d450
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,48 @@
+//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSUBTARGET_H
+#define SYSTEMZSUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
+private:
+  Triple TargetTriple;
+
+public:
+  SystemZSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS);
+
+  // Automatically generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  // Return true if GV can be accessed using LARL for reloc model RM
+  // and code model CM.
+  bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
+                       CodeModel::Model CM) const;
+
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 0000000..8c4c456
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,60 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+  // Register the target.
+  RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+}
+
+SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM,
+                                           CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS),
+    // Make sure that global data has at least 16 bits of alignment by default,
+    // so that we can refer to it using LARL.  We don't have any special
+    // requirements for stack variables though.
+    DL("E-p:64:64:64-i1:8:16-i8:8:16-i16:16-i32:32-i64:64"
+       "-f32:32-f64:64-f128:64-a0:8:16-n32:64"),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+    FrameLowering(*this, Subtarget) {
+}
+
+namespace {
+/// SystemZ Code Generator Pass Configuration Options.
+class SystemZPassConfig : public TargetPassConfig {
+public:
+  SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  SystemZTargetMachine &getSystemZTargetMachine() const {
+    return getTM<SystemZTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+};
+} // end anonymous namespace
+
+bool SystemZPassConfig::addInstSelector() {
+  addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
+  return false;
+}
+
+TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new SystemZPassConfig(this, PM);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 0000000..98614e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,74 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SYSTEMZTARGETMACHINE_H
+#define SYSTEMZTARGETMACHINE_H
+
+#include "SystemZFrameLowering.h"
+#include "SystemZISelLowering.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine : public LLVMTargetMachine {
+  SystemZSubtarget        Subtarget;
+  const DataLayout        DL;
+  SystemZInstrInfo        InstrInfo;
+  SystemZTargetLowering   TLInfo;
+  TargetSelectionDAGInfo  TSInfo;
+  SystemZFrameLowering    FrameLowering;
+
+public:
+  SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+
+  // Override TargetMachine.
+  virtual const TargetFrameLowering *getFrameLowering() const LLVM_OVERRIDE {
+    return &FrameLowering;
+  }
+  virtual const SystemZInstrInfo *getInstrInfo() const LLVM_OVERRIDE {
+    return &InstrInfo;
+  }
+  virtual const SystemZSubtarget *getSubtargetImpl() const LLVM_OVERRIDE {
+    return &Subtarget;
+  }
+  virtual const DataLayout *getDataLayout() const LLVM_OVERRIDE {
+    return &DL;
+  }
+  virtual const SystemZRegisterInfo *getRegisterInfo() const LLVM_OVERRIDE {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const SystemZTargetLowering *getTargetLowering() const LLVM_OVERRIDE {
+    return &TLInfo;
+  }
+  virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const
+    LLVM_OVERRIDE {
+    return &TSInfo;
+  }
+
+  // Override LLVMTargetMachine
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM) LLVM_OVERRIDE;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 0000000..8f9aa28
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+  RegisterTarget<Triple::systemz, /*HasJIT=*/true>
+    X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
index 9a78ebc..3d92f29 100644
--- a/contrib/llvm/lib/Target/Target.cpp
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -16,6 +16,7 @@
 #include "llvm-c/Initialization.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetLibraryInfo.h"
@@ -23,6 +24,23 @@
 
 using namespace llvm;
 
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+  return reinterpret_cast<DataLayout*>(P);
+}
+
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+  return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+  return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+
 void llvm::initializeTarget(PassRegistry &Registry) {
   initializeDataLayoutPass(Registry);
   initializeTargetLibraryInfoPass(Registry);
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 79f74bd..01d12e8 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -28,7 +28,36 @@
 
 using namespace llvm;
 
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+  return reinterpret_cast<DataLayout*>(P);
+}
+
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+  return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+  return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
 
+inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
+  return reinterpret_cast<TargetMachine*>(P);
+}
+inline Target *unwrap(LLVMTargetRef P) {
+  return reinterpret_cast<Target*>(P);
+}
+inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
+  return
+    reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
+}
+inline LLVMTargetRef wrap(const Target * P) {
+  return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
+}
 
 LLVMTargetRef LLVMGetFirstTarget() {
    const Target* target = &*TargetRegistry::begin();
@@ -77,29 +106,9 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
       break;
   }
 
-  CodeModel::Model CM;
-  switch (CodeModel) {
-    case LLVMCodeModelJITDefault:
-      CM = CodeModel::JITDefault;
-      break;
-    case LLVMCodeModelSmall:
-      CM = CodeModel::Small;
-      break;
-    case LLVMCodeModelKernel:
-      CM = CodeModel::Kernel;
-      break;
-    case LLVMCodeModelMedium:
-      CM = CodeModel::Medium;
-      break;
-    case LLVMCodeModelLarge:
-      CM = CodeModel::Large;
-      break;
-    default:
-      CM = CodeModel::Default;
-      break;
-  }
-  CodeGenOpt::Level OL;
+  CodeModel::Model CM = unwrap(CodeModel);
 
+  CodeGenOpt::Level OL;
   switch (Level) {
     case LLVMCodeGenLevelNone:
       OL = CodeGenOpt::None;
@@ -149,8 +158,8 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
   return wrap(unwrap(T)->getDataLayout());
 }
 
-LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
-  char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
+  formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
   TargetMachine* TM = unwrap(T);
   Module* Mod = unwrap(M);
 
@@ -176,14 +185,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
       ft = TargetMachine::CGFT_ObjectFile;
       break;
   }
-  raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
-  formatted_raw_ostream destf(dest);
-  if (!error.empty()) {
-    *ErrorMessage = strdup(error.c_str());
-    return true;
-  }
-
-  if (TM->addPassesToEmitFile(pass, destf, ft)) {
+  if (TM->addPassesToEmitFile(pass, OS, ft)) {
     error = "TargetMachine can't emit a file of this type";
     *ErrorMessage = strdup(error.c_str());
     return true;
@@ -191,7 +193,35 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
 
   pass.run(*Mod);
 
-  destf.flush();
-  dest.flush();
+  OS.flush();
   return false;
 }
+
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+  char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+  std::string error;
+  raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
+  formatted_raw_ostream destf(dest);
+  if (!error.empty()) {
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+  bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+  dest.flush();
+  return Result;
+}
+
+LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
+  LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
+  LLVMMemoryBufferRef *OutMemBuf) {
+  std::string CodeString;
+  raw_string_ostream OStream(CodeString);
+  formatted_raw_ostream Out(OStream);
+  bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+  OStream.flush();
+
+  std::string &Data = OStream.str();
+  *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
+                                                     Data.length(), "");
+  return Result;
+}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e462322..68908ab 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -33,17 +33,451 @@ using namespace llvm;
 namespace {
 struct X86Operand;
 
+static const char OpPrecedence[] = {
+  0, // IC_PLUS
+  0, // IC_MINUS
+  1, // IC_MULTIPLY
+  1, // IC_DIVIDE
+  2, // IC_RPAREN
+  3, // IC_LPAREN
+  0, // IC_IMM
+  0  // IC_REGISTER
+};
+
 class X86AsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
   MCAsmParser &Parser;
   ParseInstructionInfo *InstInfo;
 private:
+  enum InfixCalculatorTok {
+    IC_PLUS = 0,
+    IC_MINUS,
+    IC_MULTIPLY,
+    IC_DIVIDE,
+    IC_RPAREN,
+    IC_LPAREN,
+    IC_IMM,
+    IC_REGISTER
+  };
+
+  class InfixCalculator {
+    typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
+    SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
+    SmallVector<ICToken, 4> PostfixStack;
+    
+  public:
+    int64_t popOperand() {
+      assert (!PostfixStack.empty() && "Poped an empty stack!");
+      ICToken Op = PostfixStack.pop_back_val();
+      assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
+              && "Expected and immediate or register!");
+      return Op.second;
+    }
+    void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
+      assert ((Op == IC_IMM || Op == IC_REGISTER) &&
+              "Unexpected operand!");
+      PostfixStack.push_back(std::make_pair(Op, Val));
+    }
+    
+    void popOperator() { InfixOperatorStack.pop_back_val(); }
+    void pushOperator(InfixCalculatorTok Op) {
+      // Push the new operator if the stack is empty.
+      if (InfixOperatorStack.empty()) {
+        InfixOperatorStack.push_back(Op);
+        return;
+      }
+      
+      // Push the new operator if it has a higher precedence than the operator
+      // on the top of the stack or the operator on the top of the stack is a
+      // left parentheses.
+      unsigned Idx = InfixOperatorStack.size() - 1;
+      InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
+      if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
+        InfixOperatorStack.push_back(Op);
+        return;
+      }
+      
+      // The operator on the top of the stack has higher precedence than the
+      // new operator.
+      unsigned ParenCount = 0;
+      while (1) {
+        // Nothing to process.
+        if (InfixOperatorStack.empty())
+          break;
+        
+        Idx = InfixOperatorStack.size() - 1;
+        StackOp = InfixOperatorStack[Idx];
+        if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
+          break;
+        
+        // If we have an even parentheses count and we see a left parentheses,
+        // then stop processing.
+        if (!ParenCount && StackOp == IC_LPAREN)
+          break;
+        
+        if (StackOp == IC_RPAREN) {
+          ++ParenCount;
+          InfixOperatorStack.pop_back_val();
+        } else if (StackOp == IC_LPAREN) {
+          --ParenCount;
+          InfixOperatorStack.pop_back_val();
+        } else {
+          InfixOperatorStack.pop_back_val();
+          PostfixStack.push_back(std::make_pair(StackOp, 0));
+        }
+      }
+      // Push the new operator.
+      InfixOperatorStack.push_back(Op);
+    }
+    int64_t execute() {
+      // Push any remaining operators onto the postfix stack.
+      while (!InfixOperatorStack.empty()) {
+        InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
+        if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
+          PostfixStack.push_back(std::make_pair(StackOp, 0));
+      }
+      
+      if (PostfixStack.empty())
+        return 0;
+      
+      SmallVector<ICToken, 16> OperandStack;
+      for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
+        ICToken Op = PostfixStack[i];
+        if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
+          OperandStack.push_back(Op);
+        } else {
+          assert (OperandStack.size() > 1 && "Too few operands.");
+          int64_t Val;
+          ICToken Op2 = OperandStack.pop_back_val();
+          ICToken Op1 = OperandStack.pop_back_val();
+          switch (Op.first) {
+          default:
+            report_fatal_error("Unexpected operator!");
+            break;
+          case IC_PLUS:
+            Val = Op1.second + Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_MINUS:
+            Val = Op1.second - Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_MULTIPLY:
+            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+                    "Multiply operation with an immediate and a register!");
+            Val = Op1.second * Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_DIVIDE:
+            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+                    "Divide operation with an immediate and a register!");
+            assert (Op2.second != 0 && "Division by zero!");
+            Val = Op1.second / Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          }
+        }
+      }
+      assert (OperandStack.size() == 1 && "Expected a single result.");
+      return OperandStack.pop_back_val().second;
+    }
+  };
+
+  enum IntelExprState {
+    IES_PLUS,
+    IES_MINUS,
+    IES_MULTIPLY,
+    IES_DIVIDE,
+    IES_LBRAC,
+    IES_RBRAC,
+    IES_LPAREN,
+    IES_RPAREN,
+    IES_REGISTER,
+    IES_INTEGER,
+    IES_IDENTIFIER,
+    IES_ERROR
+  };
+
+  class IntelExprStateMachine {
+    IntelExprState State, PrevState;
+    unsigned BaseReg, IndexReg, TmpReg, Scale;
+    int64_t Imm;
+    const MCExpr *Sym;
+    StringRef SymName;
+    bool StopOnLBrac, AddImmPrefix;
+    InfixCalculator IC;
+    InlineAsmIdentifierInfo Info;
+  public:
+    IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
+      State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
+      Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
+      AddImmPrefix(addimmprefix) { Info.clear(); }
+    
+    unsigned getBaseReg() { return BaseReg; }
+    unsigned getIndexReg() { return IndexReg; }
+    unsigned getScale() { return Scale; }
+    const MCExpr *getSym() { return Sym; }
+    StringRef getSymName() { return SymName; }
+    int64_t getImm() { return Imm + IC.execute(); }
+    bool isValidEndState() { return State == IES_RBRAC; }
+    bool getStopOnLBrac() { return StopOnLBrac; }
+    bool getAddImmPrefix() { return AddImmPrefix; }
+    bool hadError() { return State == IES_ERROR; }
+
+    InlineAsmIdentifierInfo &getIdentifierInfo() {
+      return Info;
+    }
+
+    void onPlus() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_RPAREN:
+      case IES_REGISTER:
+        State = IES_PLUS;
+        IC.pushOperator(IC_PLUS);
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onMinus() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MULTIPLY:
+      case IES_DIVIDE:
+      case IES_LPAREN:
+      case IES_RPAREN:
+      case IES_LBRAC:
+      case IES_RBRAC:
+      case IES_INTEGER:
+      case IES_REGISTER:
+        State = IES_MINUS;
+        // Only push the minus operator if it is not a unary operator.
+        if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
+              CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
+              CurrState == IES_LPAREN || CurrState == IES_LBRAC))
+          IC.pushOperator(IC_MINUS);
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onRegister(unsigned Reg) {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_LPAREN:
+        State = IES_REGISTER;
+        TmpReg = Reg;
+        IC.pushOperand(IC_REGISTER);
+        break;
+      case IES_MULTIPLY:
+        // Index Register - Scale * Register
+        if (PrevState == IES_INTEGER) {
+          assert (!IndexReg && "IndexReg already set!");
+          State = IES_REGISTER;
+          IndexReg = Reg;
+          // Get the scale and replace the 'Scale * Register' with '0'.
+          Scale = IC.popOperand();
+          IC.pushOperand(IC_IMM);
+          IC.popOperator();
+        } else {
+          State = IES_ERROR;
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+        State = IES_INTEGER;
+        Sym = SymRef;
+        SymName = SymRefName;
+        IC.pushOperand(IC_IMM);
+        break;
+      }
+    }
+    void onInteger(int64_t TmpInt) {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+      case IES_DIVIDE:
+      case IES_MULTIPLY:
+      case IES_LPAREN:
+        State = IES_INTEGER;
+        if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
+          // Index Register - Register * Scale
+          assert (!IndexReg && "IndexReg already set!");
+          IndexReg = TmpReg;
+          Scale = TmpInt;
+          // Get the scale and replace the 'Register * Scale' with '0'.
+          IC.popOperator();
+        } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+                    PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+                    PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+                   CurrState == IES_MINUS) {
+          // Unary minus.  No need to pop the minus operand because it was never
+          // pushed.
+          IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
+        } else {
+          IC.pushOperand(IC_IMM, TmpInt);
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onStar() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_MULTIPLY;
+        IC.pushOperator(IC_MULTIPLY);
+        break;
+      }
+    }
+    void onDivide() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_RPAREN:
+        State = IES_DIVIDE;
+        IC.pushOperator(IC_DIVIDE);
+        break;
+      }
+    }
+    void onLBrac() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_RBRAC:
+        State = IES_PLUS;
+        IC.pushOperator(IC_PLUS);
+        break;
+      }
+    }
+    void onRBrac() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_RBRAC;
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onLParen() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+      case IES_MULTIPLY:
+      case IES_DIVIDE:
+      case IES_LPAREN:
+        // FIXME: We don't handle this type of unary minus, yet.
+        if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+            PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+            PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+            CurrState == IES_MINUS) {
+          State = IES_ERROR;
+          break;
+        }
+        State = IES_LPAREN;
+        IC.pushOperator(IC_LPAREN);
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onRParen() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_RPAREN;
+        IC.pushOperator(IC_RPAREN);
+        break;
+      }
+    }
+  };
+
   MCAsmParser &getParser() const { return Parser; }
 
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
   bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+             ArrayRef<SMRange> Ranges = None,
              bool MatchingInlineAsm = false) {
     if (MatchingInlineAsm) return true;
     return Parser.Error(L, Msg, Ranges);
@@ -57,21 +491,25 @@ private:
   X86Operand *ParseOperand();
   X86Operand *ParseATTOperand();
   X86Operand *ParseIntelOperand();
-  X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
-  X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
-  X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
+  X86Operand *ParseIntelOffsetOfOperator();
+  X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
+  X86Operand *ParseIntelOperator(unsigned OpKind);
+  X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp,
                                    SMLoc StartLoc);
-  X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp,
-                                       unsigned Size);
-  X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
-                                         SMLoc &IdentStart);
-  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+  X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
+  X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+                                       int64_t ImmDisp, unsigned Size);
+  X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
+                                   InlineAsmIdentifierInfo &Info,
+                                   bool IsUnevaluatedOperand, SMLoc &End);
 
-  X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
-                                    SMLoc SizeDirLoc, unsigned Size);
+  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
-  bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
-                             SmallString<64> &Err);
+  X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+                                    unsigned BaseReg, unsigned IndexReg,
+                                    unsigned Scale, SMLoc Start, SMLoc End,
+                                    unsigned Size, StringRef Identifier,
+                                    InlineAsmIdentifierInfo &Info);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
@@ -101,6 +539,10 @@ private:
     setAvailableFeatures(FB);
   }
 
+  bool isParsingIntelSyntax() {
+    return getParser().getAssemblerDialect();
+  }
+
   /// @name Auto-generated Matcher Functions
   /// {
 
@@ -123,10 +565,6 @@ public:
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
-
-  bool isParsingIntelSyntax() {
-    return getParser().getAssemblerDialect();
-  }
 };
 } // end anonymous namespace
 
@@ -176,6 +614,8 @@ struct X86Operand : public MCParsedAsmOperand {
 
   SMLoc StartLoc, EndLoc;
   SMLoc OffsetOfLoc;
+  StringRef SymName;
+  void *OpDecl;
   bool AddressOf;
 
   struct TokOp {
@@ -210,6 +650,9 @@ struct X86Operand : public MCParsedAsmOperand {
   X86Operand(KindTy K, SMLoc Start, SMLoc End)
     : Kind(K), StartLoc(Start), EndLoc(End) {}
 
+  StringRef getSymName() { return SymName; }
+  void *getOpDecl() { return OpDecl; }
+
   /// getStartLoc - Get the location of the first token of this operand.
   SMLoc getStartLoc() const { return StartLoc; }
   /// getEndLoc - Get the location of the last token of this operand.
@@ -473,11 +916,15 @@ struct X86Operand : public MCParsedAsmOperand {
 
   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
                                bool AddressOf = false,
-                               SMLoc OffsetOfLoc = SMLoc()) {
+                               SMLoc OffsetOfLoc = SMLoc(),
+                               StringRef SymName = StringRef(),
+                               void *OpDecl = 0) {
     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
     Res->Reg.RegNo = RegNo;
     Res->AddressOf = AddressOf;
     Res->OffsetOfLoc = OffsetOfLoc;
+    Res->SymName = SymName;
+    Res->OpDecl = OpDecl;
     return Res;
   }
 
@@ -489,7 +936,8 @@ struct X86Operand : public MCParsedAsmOperand {
 
   /// Create an absolute memory operand.
   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0) {
+                               unsigned Size = 0, StringRef SymName = StringRef(),
+                               void *OpDecl = 0) {
     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
     Res->Mem.SegReg   = 0;
     Res->Mem.Disp     = Disp;
@@ -497,7 +945,9 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = 0;
     Res->Mem.Scale    = 1;
     Res->Mem.Size     = Size;
-    Res->AddressOf = false;
+    Res->SymName      = SymName;
+    Res->OpDecl       = OpDecl;
+    Res->AddressOf    = false;
     return Res;
   }
 
@@ -505,7 +955,9 @@ struct X86Operand : public MCParsedAsmOperand {
   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
                                unsigned BaseReg, unsigned IndexReg,
                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0) {
+                               unsigned Size = 0,
+                               StringRef SymName = StringRef(),
+                               void *OpDecl = 0) {
     // We should never just have a displacement, that should be parsed as an
     // absolute memory operand.
     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -520,7 +972,9 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = IndexReg;
     Res->Mem.Scale    = Scale;
     Res->Mem.Size     = Size;
-    Res->AddressOf = false;
+    Res->SymName      = SymName;
+    Res->OpDecl       = OpDecl;
+    Res->AddressOf    = false;
     return Res;
   }
 };
@@ -676,306 +1130,104 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
   return Size;
 }
 
-enum IntelBracExprState {
-  IBES_START,
-  IBES_LBRAC,
-  IBES_RBRAC,
-  IBES_REGISTER,
-  IBES_REGISTER_STAR,
-  IBES_REGISTER_STAR_INTEGER,
-  IBES_INTEGER,
-  IBES_INTEGER_STAR,
-  IBES_INDEX_REGISTER,
-  IBES_IDENTIFIER,
-  IBES_DISP_EXPR,
-  IBES_MINUS,
-  IBES_ERROR
-};
-
-class IntelBracExprStateMachine {
-  IntelBracExprState State;
-  unsigned BaseReg, IndexReg, Scale;
-  int64_t Disp;
-
-  unsigned TmpReg;
-  int64_t TmpInteger;
-
-  bool isPlus;
-
-public:
-  IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
-    State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp),
-    TmpReg(0), TmpInteger(0), isPlus(true) {}
-
-  unsigned getBaseReg() { return BaseReg; }
-  unsigned getIndexReg() { return IndexReg; }
-  unsigned getScale() { return Scale; }
-  int64_t getDisp() { return Disp; }
-  bool isValidEndState() { return State == IBES_RBRAC; }
-
-  void onPlus() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_INTEGER:
-      State = IBES_START;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_START;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_START;
-      break;
-    }
-    isPlus = true;
-  }
-  void onMinus() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_MINUS;
-      break;
-    case IBES_INTEGER:
-      State = IBES_START;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_START;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_START;
-      break;
-    }
-    isPlus = false;
-  }
-  void onRegister(unsigned Reg) {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_REGISTER;
-      TmpReg = Reg;
-      break;
-    case IBES_INTEGER_STAR:
-      assert (!IndexReg && "IndexReg already set!");
-      State = IBES_INDEX_REGISTER;
-      IndexReg = Reg;
-      Scale = TmpInteger;
-      break;
-    }
-  }
-  void onDispExpr() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_DISP_EXPR;
-      break;
+X86Operand *
+X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+                                    unsigned BaseReg, unsigned IndexReg,
+                                    unsigned Scale, SMLoc Start, SMLoc End,
+                                    unsigned Size, StringRef Identifier,
+                                    InlineAsmIdentifierInfo &Info){
+  if (isa<MCSymbolRefExpr>(Disp)) {
+    // If this is not a VarDecl then assume it is a FuncDecl or some other label
+    // reference.  We need an 'r' constraint here, so we need to create register
+    // operand to ensure proper matching.  Just pick a GPR based on the size of
+    // a pointer.
+    if (!Info.IsVarDecl) {
+      unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+      return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
+                                   SMLoc(), Identifier, Info.OpDecl);
     }
-  }
-  void onInteger(int64_t TmpInt) {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_INTEGER;
-      TmpInteger = TmpInt;
-      break;
-    case IBES_MINUS:
-      State = IBES_INTEGER;
-      TmpInteger = TmpInt;
-      break;
-    case IBES_REGISTER_STAR:
-      assert (!IndexReg && "IndexReg already set!");
-      State = IBES_INDEX_REGISTER;
-      IndexReg = TmpReg;
-      Scale = TmpInt;
-      break;
-    }
-  }
-  void onStar() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_INTEGER:
-      State = IBES_INTEGER_STAR;
-      break;
-    case IBES_REGISTER:
-      State = IBES_REGISTER_STAR;
-      break;
-    }
-  }
-  void onLBrac() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_RBRAC:
-      State = IBES_START;
-      isPlus = true;
-      break;
-    }
-  }
-  void onRBrac() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_DISP_EXPR:
-      State = IBES_RBRAC;
-      break;
-    case IBES_INTEGER:
-      State = IBES_RBRAC;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_RBRAC;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_RBRAC;
-      break;
-    }
-  }
-};
-
-X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
-                                                SMLoc End, SMLoc SizeDirLoc,
-                                                unsigned Size) {
-  bool NeedSizeDir = false;
-  bool IsVarDecl = false;
-  if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
-    const MCSymbol &Sym = SymRef->getSymbol();
-    // FIXME: The SemaLookup will fail if the name is anything other then an
-    // identifier.
-    // FIXME: Pass a valid SMLoc.
-    unsigned tLength, tSize, tType;
-    SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
-                                            tSize, tType, IsVarDecl);
     if (!Size) {
-      Size = tType * 8; // Size is in terms of bits in this context.
-      NeedSizeDir = Size > 0;
+      Size = Info.Type * 8; // Size is in terms of bits in this context.
+      if (Size)
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
+                                                    /*Len=*/0, Size));
     }
   }
 
-  // If this is not a VarDecl then assume it is a FuncDecl or some other label
-  // reference.  We need an 'r' constraint here, so we need to create register
-  // operand to ensure proper matching.  Just pick a GPR based on the size of
-  // a pointer.
-  if (!IsVarDecl) {
-    unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
-    return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
-  }
-
-  if (NeedSizeDir)
-    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
-                                                /*Len*/0, Size));  
-
   // When parsing inline assembly we set the base register to a non-zero value
-  // as we don't know the actual value at this time.  This is necessary to
+  // if we don't know the actual value at this time.  This is necessary to
   // get the matching correct in some cases.
-  return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
-                               /*Scale*/1, Start, End, Size);
+  BaseReg = BaseReg ? BaseReg : 1;
+  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                               End, Size, Identifier, Info.OpDecl);
 }
 
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
-                                                   uint64_t ImmDisp,
-                                                   unsigned Size) {
-  const AsmToken &Tok = Parser.getTok();
-  SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
-
-  // Eat '['
-  if (getLexer().isNot(AsmToken::LBrac))
-    return ErrorOperand(Start, "Expected '[' token!");
-  Parser.Lex();
-
-  unsigned TmpReg = 0;
-
-  // Try to handle '[' 'Symbol' ']'
-  if (getLexer().is(AsmToken::Identifier)) {
-    if (ParseRegister(TmpReg, Start, End)) {
-      const MCExpr *Disp;
-      SMLoc IdentStart = Tok.getLoc();
-      if (getParser().parseExpression(Disp, End))
-        return 0;
-
-      if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
-        return Err;
-
-      if (getLexer().isNot(AsmToken::RBrac))
-        return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
-
-      // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'.
-      if (ImmDisp)
-        return ErrorOperand(Start, "Unsupported immediate displacement!");
-
-      // Adjust the EndLoc due to the ']'.
-      End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
-      Parser.Lex();
-      if (!isParsingInlineAsm())
-        return X86Operand::CreateMem(Disp, Start, End, Size);
-
-      // We want the size directive before the '['.
-      SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
-      return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
+static void
+RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
+                           StringRef SymName, int64_t ImmDisp,
+                           int64_t FinalImmDisp, SMLoc &BracLoc,
+                           SMLoc &StartInBrac, SMLoc &End) {
+  // Remove the '[' and ']' from the IR string.
+  AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
+  AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
+
+  // If ImmDisp is non-zero, then we parsed a displacement before the
+  // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
+  // If ImmDisp doesn't match the displacement computed by the state machine
+  // then we have an additional displacement in the bracketed expression.
+  if (ImmDisp != FinalImmDisp) {
+    if (ImmDisp) {
+      // We have an immediate displacement before the bracketed expression.
+      // Adjust this to match the final immediate displacement.
+      bool Found = false;
+      for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+             E = AsmRewrites->end(); I != E; ++I) {
+        if ((*I).Loc.getPointer() > BracLoc.getPointer())
+          continue;
+        if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
+          assert (!Found && "ImmDisp already rewritten.");
+          (*I).Kind = AOK_Imm;
+          (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
+          (*I).Val = FinalImmDisp;
+          Found = true;
+          break;
+        }
+      }
+      assert (Found && "Unable to rewrite ImmDisp.");
+    } else {
+      // We have a symbolic and an immediate displacement, but no displacement
+      // before the bracketed expression.  Put the immediate displacement
+      // before the bracketed expression.
+      AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
     }
   }
+  // Remove all the ImmPrefix rewrites within the brackets.
+  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+         E = AsmRewrites->end(); I != E; ++I) {
+    if ((*I).Loc.getPointer() < StartInBrac.getPointer())
+      continue;
+    if ((*I).Kind == AOK_ImmPrefix)
+      (*I).Kind = AOK_Delete;
+  }
+  const char *SymLocPtr = SymName.data();
+  // Skip everything before the symbol.        
+  if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
+    assert(Len > 0 && "Expected a non-negative length.");
+    AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
+  }
+  // Skip everything after the symbol.
+  if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
+    SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
+    assert(Len > 0 && "Expected a non-negative length.");
+    AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
+  }
+}
 
-  // Parse [ BaseReg + Scale*IndexReg + Disp ].  We may have already parsed an
-  // immediate displacement before the bracketed expression.
-  bool Done = false;
-  IntelBracExprStateMachine SM(Parser, ImmDisp);
-
-  // If we parsed a register, then the end loc has already been set and
-  // the identifier has already been lexed.  We also need to update the
-  // state.
-  if (TmpReg)
-    SM.onRegister(TmpReg);
+X86Operand *
+X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+  const AsmToken &Tok = Parser.getTok();
 
-  const MCExpr *Disp = 0;
+  bool Done = false;
   while (!Done) {
     bool UpdateLocLex = true;
 
@@ -983,6 +1235,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
     // identifier.  Don't try an parse it as a register.
     if (Tok.getString().startswith("."))
       break;
+    
+    // If we're parsing an immediate expression, we don't expect a '['.
+    if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
+      break;
 
     switch (getLexer().getKind()) {
     default: {
@@ -992,139 +1248,185 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
       }
       return ErrorOperand(Tok.getLoc(), "Unexpected token!");
     }
+    case AsmToken::EndOfStatement: {
+      Done = true;
+      break;
+    }
     case AsmToken::Identifier: {
-      // This could be a register or a displacement expression.
-      if(!ParseRegister(TmpReg, Start, End)) {
+      // This could be a register or a symbolic displacement.
+      unsigned TmpReg;
+      const MCExpr *Val;
+      SMLoc IdentLoc = Tok.getLoc();
+      StringRef Identifier = Tok.getString();
+      if(!ParseRegister(TmpReg, IdentLoc, End)) {
         SM.onRegister(TmpReg);
         UpdateLocLex = false;
         break;
-      } else if (!getParser().parseExpression(Disp, End)) {
-        SM.onDispExpr();
+      } else {
+        if (!isParsingInlineAsm()) {
+          if (getParser().parsePrimaryExpr(Val, End))
+            return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+        } else {
+          InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+          if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                                     /*Unevaluated*/ false, End))
+            return Err;
+        }
+        SM.onIdentifierExpr(Val, Identifier);
         UpdateLocLex = false;
         break;
       }
       return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
     }
-    case AsmToken::Integer: {
-      int64_t Val = Tok.getIntVal();
-      SM.onInteger(Val);
+    case AsmToken::Integer:
+      if (isParsingInlineAsm() && SM.getAddImmPrefix())
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+                                                    Tok.getLoc()));
+      SM.onInteger(Tok.getIntVal());
       break;
-    }
     case AsmToken::Plus:    SM.onPlus(); break;
     case AsmToken::Minus:   SM.onMinus(); break;
     case AsmToken::Star:    SM.onStar(); break;
+    case AsmToken::Slash:   SM.onDivide(); break;
     case AsmToken::LBrac:   SM.onLBrac(); break;
     case AsmToken::RBrac:   SM.onRBrac(); break;
+    case AsmToken::LParen:  SM.onLParen(); break;
+    case AsmToken::RParen:  SM.onRParen(); break;
     }
+    if (SM.hadError())
+      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+
     if (!Done && UpdateLocLex) {
       End = Tok.getLoc();
       Parser.Lex(); // Consume the token.
     }
   }
+  return 0;
+}
 
-  if (!Disp)
-    Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+                                                   int64_t ImmDisp,
+                                                   unsigned Size) {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
+  if (getLexer().isNot(AsmToken::LBrac))
+    return ErrorOperand(BracLoc, "Expected '[' token!");
+  Parser.Lex(); // Eat '['
+
+  SMLoc StartInBrac = Tok.getLoc();
+  // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ].  We
+  // may have already parsed an immediate displacement before the bracketed
+  // expression.
+  IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
+  if (X86Operand *Err = ParseIntelExpression(SM, End))
+    return Err;
+
+  const MCExpr *Disp;
+  if (const MCExpr *Sym = SM.getSym()) {
+    // A symbolic displacement.
+    Disp = Sym;
+    if (isParsingInlineAsm())
+      RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
+                                 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
+                                 End);
+  } else {
+    // An immediate displacement only.   
+    Disp = MCConstantExpr::Create(SM.getImm(), getContext());
+  }
 
   // Parse the dot operator (e.g., [ebx].foo.bar).
   if (Tok.getString().startswith(".")) {
-    SmallString<64> Err;
     const MCExpr *NewDisp;
-    if (ParseIntelDotOperator(Disp, &NewDisp, Err))
-      return ErrorOperand(Tok.getLoc(), Err);
+    if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
+      return Err;
     
-    End = Parser.getTok().getEndLoc();
+    End = Tok.getEndLoc();
     Parser.Lex();  // Eat the field.
     Disp = NewDisp;
   }
 
   int BaseReg = SM.getBaseReg();
   int IndexReg = SM.getIndexReg();
-
-  // handle [-42]
-  if (!BaseReg && !IndexReg) {
-    if (!SegReg)
-      return X86Operand::CreateMem(Disp, Start, End);
-    else
-      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+  int Scale = SM.getScale();
+  if (!isParsingInlineAsm()) {
+    // handle [-42]
+    if (!BaseReg && !IndexReg) {
+      if (!SegReg)
+        return X86Operand::CreateMem(Disp, Start, End, Size);
+      else
+        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+    }
+    return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                                 End, Size);
   }
 
-  int Scale = SM.getScale();
-  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
-                               Start, End, Size);
+  InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+  return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                               End, Size, SM.getSymName(), Info);
 }
 
 // Inline assembly may use variable names with namespace alias qualifiers.
-X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
-                                                     SMLoc &IdentStart) {
-  // We should only see Foo::Bar if we're parsing inline assembly.
-  if (!isParsingInlineAsm())
-    return 0;
-
-  // If we don't see a ':' then there can't be a qualifier.
-  if (getLexer().isNot(AsmToken::Colon))
-    return 0;
+X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
+                                               StringRef &Identifier,
+                                               InlineAsmIdentifierInfo &Info,
+                                               bool IsUnevaluatedOperand,
+                                               SMLoc &End) {
+  assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+  Val = 0;
 
+  StringRef LineBuf(Identifier.data());
+  SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
 
-  bool Done = false;
   const AsmToken &Tok = Parser.getTok();
-  SMLoc IdentEnd = Tok.getEndLoc();
-  while (!Done) {
-    switch (getLexer().getKind()) {
-    default:
-      Done = true; 
-      break;
-    case AsmToken::Colon:
-      getLexer().Lex(); // Consume ':'.
-      if (getLexer().isNot(AsmToken::Colon))
-        return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
-      getLexer().Lex(); // Consume second ':'.
-      if (getLexer().isNot(AsmToken::Identifier))
-        return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
-      break;
-    case AsmToken::Identifier:
-      IdentEnd = Tok.getEndLoc();
-      getLexer().Lex(); // Consume the identifier.
-      break;
-    }
+
+  // Advance the token stream until the end of the current token is
+  // after the end of what the frontend claimed.
+  const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
+  while (true) {
+    End = Tok.getEndLoc();
+    getLexer().Lex();
+
+    assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
+    if (End.getPointer() == EndPtr) break;
   }
-  size_t Len = IdentEnd.getPointer() - IdentStart.getPointer();
-  StringRef Identifier(IdentStart.getPointer(), Len);
+
+  // Create the symbol reference.
+  Identifier = LineBuf;
   MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
-  Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+  Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
   return 0;
 }
 
 /// ParseIntelMemOperand - Parse intel style memory operand.
 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
-                                               uint64_t ImmDisp,
+                                               int64_t ImmDisp,
                                                SMLoc Start) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc End;
 
   unsigned Size = getIntelMemOperandSize(Tok.getString());
   if (Size) {
-    Parser.Lex();
-    assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
-            "Unexpected token!");
-    Parser.Lex();
+    Parser.Lex(); // Eat operand size (e.g., byte, word).
+    if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
+      return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+    Parser.Lex(); // Eat ptr.
   }
 
   // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
   if (getLexer().is(AsmToken::Integer)) {
-    const AsmToken &IntTok = Parser.getTok();
     if (isParsingInlineAsm())
       InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
-                                                  IntTok.getLoc()));
-    uint64_t ImmDisp = IntTok.getIntVal();
+                                                  Tok.getLoc()));
+    int64_t ImmDisp = Tok.getIntVal();
     Parser.Lex(); // Eat the integer.
     if (getLexer().isNot(AsmToken::LBrac))
       return ErrorOperand(Start, "Expected '[' token!");
-    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
   }
 
   if (getLexer().is(AsmToken::LBrac))
-    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
 
   if (!ParseRegister(SegReg, Start, End)) {
     // Handel SegReg : [ ... ]
@@ -1133,37 +1435,37 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
     Parser.Lex(); // Eat :
     if (getLexer().isNot(AsmToken::LBrac))
       return ErrorOperand(Start, "Expected '[' token!");
-    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
   }
 
-  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
-  SMLoc IdentStart = Tok.getLoc();
-  if (getParser().parseExpression(Disp, End))
-    return 0;
+  const MCExpr *Val;
+  if (!isParsingInlineAsm()) {
+    if (getParser().parsePrimaryExpr(Val, End))
+      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
 
-  if (!isParsingInlineAsm())
-    return X86Operand::CreateMem(Disp, Start, End, Size);
+    return X86Operand::CreateMem(Val, Start, End, Size);
+  }
 
-  if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+  InlineAsmIdentifierInfo Info;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                             /*Unevaluated*/ false, End))
     return Err;
-
-  return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
+  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+                               /*Scale=*/1, Start, End, Size, Identifier, Info);
 }
 
 /// Parse the '.' operator.
-bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
-                                         const MCExpr **NewDisp,
-                                         SmallString<64> &Err) {
-  AsmToken Tok = *&Parser.getTok();
-  uint64_t OrigDispVal, DotDispVal;
+X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+                                                const MCExpr *&NewDisp) {
+  const AsmToken &Tok = Parser.getTok();
+  int64_t OrigDispVal, DotDispVal;
 
   // FIXME: Handle non-constant expressions.
-  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
+  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
     OrigDispVal = OrigDisp->getValue();
-  } else {
-    Err = "Non-constant offsets are not supported!";
-    return true;
-  }
+  else
+    return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
 
   // Drop the '.'.
   StringRef DotDispStr = Tok.getString().drop_front(1);
@@ -1173,23 +1475,15 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
     APInt DotDisp;
     DotDispStr.getAsInteger(10, DotDisp);
     DotDispVal = DotDisp.getZExtValue();
-  } else if (Tok.is(AsmToken::Identifier)) {
-    // We should only see an identifier when parsing the original inline asm.
-    // The front-end should rewrite this in terms of immediates.
-    assert (isParsingInlineAsm() && "Unexpected field name!");
-
+  } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
     unsigned DotDisp;
     std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
     if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
-                                           DotDisp)) {
-      Err = "Unable to lookup field reference!";
-      return true;
-    }
+                                           DotDisp))
+      return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
     DotDispVal = DotDisp;
-  } else {
-    Err = "Unexpected token type!";
-    return true;
-  }
+  } else
+    return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
 
   if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
     SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
@@ -1199,22 +1493,24 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
                                                 Val));
   }
 
-  *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
-  return false;
+  NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+  return 0;
 }
 
 /// Parse the 'offset' operator.  This operator is used to specify the
 /// location rather then the content of a variable.
-X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
-  SMLoc OffsetOfLoc = Start;
+X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc OffsetOfLoc = Tok.getLoc();
   Parser.Lex(); // Eat offset.
-  Start = Parser.getTok().getLoc();
-  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
 
-  SMLoc End;
   const MCExpr *Val;
-  if (getParser().parseExpression(Val, End))
-    return ErrorOperand(Start, "Unable to parse expression!");
+  InlineAsmIdentifierInfo Info;
+  SMLoc Start = Tok.getLoc(), End;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                             /*Unevaluated*/ false, End))
+    return Err;
 
   // Don't emit the offset operator.
   InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -1224,7 +1520,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
   // the size of a pointer.
   unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
-                               OffsetOfLoc);
+                               OffsetOfLoc, Identifier, Info.OpDecl);
 }
 
 enum IntelOperatorKind {
@@ -1239,34 +1535,25 @@ enum IntelOperatorKind {
 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
 /// TYPE operator returns the size of a C or C++ type or variable. If the
 /// variable is an array, TYPE returns the size of a single element.
-X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
-  SMLoc TypeLoc = Start;
-  Parser.Lex(); // Eat offset.
-  Start = Parser.getTok().getLoc();
-  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
-
-  SMLoc End;
-  const MCExpr *Val;
-  if (getParser().parseExpression(Val, End))
-    return 0;
+X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc TypeLoc = Tok.getLoc();
+  Parser.Lex(); // Eat operator.
+
+  const MCExpr *Val = 0;
+  InlineAsmIdentifierInfo Info;
+  SMLoc Start = Tok.getLoc(), End;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                             /*Unevaluated*/ true, End))
+    return Err;
 
-  unsigned Length = 0, Size = 0, Type = 0;
-  if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
-    const MCSymbol &Sym = SymRef->getSymbol();
-    // FIXME: The SemaLookup will fail if the name is anything other then an
-    // identifier.
-    // FIXME: Pass a valid SMLoc.
-    bool IsVarDecl;
-    if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
-                                                 Size, Type, IsVarDecl))
-      return ErrorOperand(Start, "Unable to lookup expr!");
-  }
-  unsigned CVal;
+  unsigned CVal = 0;
   switch(OpKind) {
   default: llvm_unreachable("Unexpected operand kind!");
-  case IOK_LENGTH: CVal = Length; break;
-  case IOK_SIZE: CVal = Size; break;
-  case IOK_TYPE: CVal = Type; break;
+  case IOK_LENGTH: CVal = Info.Length; break;
+  case IOK_SIZE: CVal = Info.Size; break;
+  case IOK_TYPE: CVal = Info.Type; break;
   }
 
   // Rewrite the type operator and the C or C++ type or variable in terms of an
@@ -1279,44 +1566,54 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
 }
 
 X86Operand *X86AsmParser::ParseIntelOperand() {
-  SMLoc Start = Parser.getTok().getLoc(), End;
-  StringRef AsmTokStr = Parser.getTok().getString();
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc Start = Tok.getLoc(), End;
 
   // Offset, length, type and size operators.
   if (isParsingInlineAsm()) {
+    StringRef AsmTokStr = Tok.getString();
     if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
-      return ParseIntelOffsetOfOperator(Start);
+      return ParseIntelOffsetOfOperator();
     if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
-      return ParseIntelOperator(Start, IOK_LENGTH);
+      return ParseIntelOperator(IOK_LENGTH);
     if (AsmTokStr == "size" || AsmTokStr == "SIZE")
-      return ParseIntelOperator(Start, IOK_SIZE);
+      return ParseIntelOperator(IOK_SIZE);
     if (AsmTokStr == "type" || AsmTokStr == "TYPE")
-      return ParseIntelOperator(Start, IOK_TYPE);
+      return ParseIntelOperator(IOK_TYPE);
   }
 
   // Immediate.
-  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
-      getLexer().is(AsmToken::Minus)) {
-    const MCExpr *Val;
-    bool isInteger = getLexer().is(AsmToken::Integer);
-    if (!getParser().parseExpression(Val, End)) {
-      if (isParsingInlineAsm())
+  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
+      getLexer().is(AsmToken::LParen)) {    
+    AsmToken StartTok = Tok;
+    IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
+                             /*AddImmPrefix=*/false);
+    if (X86Operand *Err = ParseIntelExpression(SM, End))
+      return Err;
+
+    int64_t Imm = SM.getImm();
+    if (isParsingInlineAsm()) {
+      unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
+      if (StartTok.getString().size() == Len)
+        // Just add a prefix if this wasn't a complex immediate expression.
         InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
-      // Immediate.
-      if (getLexer().isNot(AsmToken::LBrac))
-        return X86Operand::CreateImm(Val, Start, End);
-
-      // Only positive immediates are valid.
-      if (!isInteger) {
-        Error(Parser.getTok().getLoc(), "expected a positive immediate "
-              "displacement before bracketed expr.");
-        return 0;
-      }
+      else
+        // Otherwise, rewrite the complex expression as a single immediate.
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
+    }
 
-      // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
-      if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
-        return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
+    if (getLexer().isNot(AsmToken::LBrac)) {
+      const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
+      return X86Operand::CreateImm(ImmExpr, Start, End);
     }
+
+    // Only positive immediates are valid.
+    if (Imm < 0)
+      return ErrorOperand(Start, "expected a positive immediate displacement "
+                          "before bracketed expr.");
+
+    // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+    return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start);
   }
 
   // Register.
@@ -1907,7 +2204,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   assert(!Operands.empty() && "Unexpect empty operand list!");
   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
-  ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
+  ArrayRef<SMRange> EmptyRanges = None;
 
   // First, handle aliases that expand to multiple instructions.
   // FIXME: This should be replaced with a real .td file alias mechanism.
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 3669560..d8f7278 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -20,6 +20,7 @@
 #include "X86MCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
 
@@ -41,7 +42,6 @@ namespace X86 {
     AddrNumOperands = 5
   };
 } // end namespace X86;
- 
 
 /// X86II - This namespace holds all of the target specific flags that
 /// instruction info tracks.
@@ -274,11 +274,12 @@ namespace X86II {
 
     //// MRM_XX - A mod/rm byte of exactly 0xXX.
     MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
-    MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
-    MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
-    MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50,
-    MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54,
-    MRM_DD = 55, MRM_DE = 56, MRM_DF = 57,
+    MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39, MRM_CB = 40,
+    MRM_E8 = 41, MRM_F0 = 42, MRM_F8 = 45, MRM_F9 = 46,
+    MRM_D0 = 47, MRM_D1 = 48, MRM_D4 = 49, MRM_D5 = 50,
+    MRM_D6 = 51, MRM_D8 = 52, MRM_D9 = 53, MRM_DA = 54,
+    MRM_DB = 55, MRM_DC = 56, MRM_DD = 57, MRM_DE = 58,
+    MRM_DF = 59,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
     /// immediates, the first of which is a 16-bit immediate (specified by
@@ -521,6 +522,26 @@ namespace X86II {
     }
   }
 
+  /// getOperandBias - compute any additional adjustment needed to
+  ///                  the offset to the start of the memory operand
+  ///                  in this instruction.
+  /// If this is a two-address instruction,skip one of the register operands.
+  /// FIXME: This should be handled during MCInst lowering.
+  inline int getOperandBias(const MCInstrDesc& Desc)
+  {
+    unsigned NumOps = Desc.getNumOperands();
+    unsigned CurOp = 0;
+    if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+      ++CurOp;
+    else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+      assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+      // Special case for GATHER with 2 TIED_TO operands
+      // Skip the first 2 operands: dst, mask_wb
+      CurOp += 2;
+    }
+    return CurOp;
+  }
+
   /// getMemoryOperandNo - The function returns the MCInst operand # for the
   /// first field of the memory operand.  If the instruction doesn't have a
   /// memory operand, this returns -1.
@@ -576,12 +597,13 @@ namespace X86II {
     }
     case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
     case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
-    case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
-    case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1:
-    case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
-    case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
-    case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
-    case X86II::MRM_DE: case X86II::MRM_DF:
+    case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_E8:
+    case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9:
+    case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+    case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+    case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+    case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+    case X86II::MRM_DF:
       return -1;
     }
   }
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 776cee1..016af71 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -237,6 +237,14 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) {
   return GOT_Normal;
 }
 
+static bool HasSecRelSymbolRef(const MCExpr *Expr) {
+  if (Expr->getKind() == MCExpr::SymbolRef) {
+    const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+    return Ref->getKind() == MCSymbolRefExpr::VK_SECREL;
+  }
+  return false;
+}
+
 void X86MCCodeEmitter::
 EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
               MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
@@ -268,8 +276,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
       if (Kind == GOT_Normal)
         ImmOffset = CurByte;
     } else if (Expr->getKind() == MCExpr::SymbolRef) {
-      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
-      if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) {
+      if (HasSecRelSymbolRef(Expr)) {
+        FixupKind = MCFixupKind(FK_SecRel_4);
+      }
+    } else if (Expr->getKind() == MCExpr::Binary) {
+      const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr*>(Expr);
+      if (HasSecRelSymbolRef(Bin->getLHS())
+          || HasSecRelSymbolRef(Bin->getRHS())) {
         FixupKind = MCFixupKind(FK_SecRel_4);
       }
     }
@@ -979,18 +992,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
     return;
 
-  // If this is a two-address instruction, skip one of the register operands.
-  // FIXME: This should be handled during MCInst lowering.
   unsigned NumOps = Desc.getNumOperands();
-  unsigned CurOp = 0;
-  if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
-    ++CurOp;
-  else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
-    assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
-    // Special case for GATHER with 2 TIED_TO operands
-    // Skip the first 2 operands: dst, mask_wb
-    CurOp += 2;
-  }
+  unsigned CurOp = X86II::getOperandBias(Desc);
 
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
@@ -1138,12 +1141,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     break;
   case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
   case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
-  case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
-  case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
-  case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
-  case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
-  case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0:
-  case X86II::MRM_F8: case X86II::MRM_F9:
+  case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0:
+  case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5:
+  case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9:
+  case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+  case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+  case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+  case X86II::MRM_F9:
     EmitByte(BaseOpcode, CurByte, OS);
 
     unsigned char MRM;
@@ -1155,6 +1159,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     case X86II::MRM_C4: MRM = 0xC4; break;
     case X86II::MRM_C8: MRM = 0xC8; break;
     case X86II::MRM_C9: MRM = 0xC9; break;
+    case X86II::MRM_CA: MRM = 0xCA; break;
+    case X86II::MRM_CB: MRM = 0xCB; break;
     case X86II::MRM_D0: MRM = 0xD0; break;
     case X86II::MRM_D1: MRM = 0xD1; break;
     case X86II::MRM_D4: MRM = 0xD4; break;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index bc272ef..ed64a32 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -9,6 +9,8 @@
 
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -27,7 +29,9 @@ namespace {
     X86WinCOFFObjectWriter(bool Is64Bit_);
     ~X86WinCOFFObjectWriter();
 
-    virtual unsigned getRelocType(unsigned FixupKind) const;
+    virtual unsigned getRelocType(const MCValue &Target,
+                                  const MCFixup &Fixup,
+                                  bool IsCrossSection) const LLVM_OVERRIDE;
   };
 }
 
@@ -38,7 +42,14 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
 
 X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
 
-unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
+unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
+                                              const MCFixup &Fixup,
+                                              bool IsCrossSection) const {
+  unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind();
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
   switch (FixupKind) {
   case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
@@ -46,6 +57,9 @@ unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
     return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
   case FK_Data_4:
   case X86::reloc_signed_4byte:
+    if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
+      return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB :
+                       COFF::IMAGE_REL_I386_DIR32NB;
     return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
   case FK_Data_8:
     if (Is64Bit)
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 1f9919f..947002f 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -69,6 +69,11 @@ ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
 /// createX86PadShortFunctions - Return a pass that pads short functions
 /// with NOOPs. This will prevent a stall when returning on the Atom.
 FunctionPass *createX86PadShortFunctions();
+/// createX86FixupLEAs - Return a a pass that selectively replaces
+/// certain instructions (like add, sub, inc, dec, some shifts,
+/// and some multiplies) by equivalent LEA instructions, in order
+/// to eliminate execution delays in some Atom processors.
+FunctionPass *createX86FixupLEAs();
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 1dcc344..c865500 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -139,6 +139,8 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
 def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
                                      "CallRegIndirect", "true",
                                      "Call register indirect">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+                                   "LEA instruction needs inputs at AG stage">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -188,6 +190,7 @@ def : ProcessorModel<"atom", AtomModel,
                       FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
                       FeatureSlowDivide,
                       FeatureCallRegIndirect,
+                      FeatureLEAUsesAG,
                       FeaturePadShortFunctions]>;
 
 // "Arrandale" along with corei3 and corei5
@@ -252,11 +255,16 @@ def : Proc<"amdfam10",        [FeatureSSE4A,
 // Bobcat
 def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
                                FeatureLZCNT, FeaturePOPCNT]>;
+// Jaguar
+def : Proc<"btver2",          [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeatureAES, FeaturePCLMUL, FeatureBMI,
+                               FeatureF16C, FeatureMOVBE, FeatureLZCNT,
+                               FeaturePOPCNT]>;
 // Bulldozer
 def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePCLMUL,
                                FeatureLZCNT, FeaturePOPCNT]>;
-// Enhanced Bulldozer
+// Piledriver
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePCLMUL,
                                FeatureF16C, FeatureLZCNT,
@@ -300,6 +308,9 @@ def ATTAsmParser : AsmParser {
 def ATTAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
+  // Variant name.
+  string Name = "att";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = "#";
 
@@ -310,6 +321,9 @@ def ATTAsmParserVariant : AsmParserVariant {
 def IntelAsmParserVariant : AsmParserVariant {
   int Variant = 1;
 
+  // Variant name.
+  string Name = "intel";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = ";";
 
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
index 2518e02..8fea6ed 100644
--- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -1451,6 +1451,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     MCE.emitByte(BaseOpcode);
     MCE.emitByte(0xC9);
     break;
+  case X86II::MRM_CA:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xCA);
+    break;
+  case X86II::MRM_CB:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xCB);
+    break;
   case X86II::MRM_E8:
     MCE.emitByte(BaseOpcode);
     MCE.emitByte(0xE8);
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index cadec68..cf44bd0 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -68,12 +68,12 @@ public:
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
-  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// \brief The specified machine instr operand is a vreg, and that
   /// vreg is being provided by the specified load instruction.  If possible,
   /// try to fold the load as an operand to the instruction, returning true if
   /// possible.
-  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                             const LoadInst *LI);
+  virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                   const LoadInst *LI);
 
   virtual bool FastLowerArguments();
 
@@ -107,6 +107,8 @@ private:
 
   bool X86SelectShift(const Instruction *I);
 
+  bool X86SelectDivRem(const Instruction *I);
+
   bool X86SelectSelect(const Instruction *I);
 
   bool X86SelectTrunc(const Instruction *I);
@@ -691,11 +693,6 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
   if (S->isAtomic())
     return false;
 
-  unsigned SABIAlignment =
-    TD.getABITypeAlignment(S->getValueOperand()->getType());
-  if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
-    return false;
-
   MVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
     return false;
@@ -1235,6 +1232,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
   return true;
 }
 
+bool X86FastISel::X86SelectDivRem(const Instruction *I) {
+  const static unsigned NumTypes = 4; // i8, i16, i32, i64
+  const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
+  const static bool S = true;  // IsSigned
+  const static bool U = false; // !IsSigned
+  const static unsigned Copy = TargetOpcode::COPY;
+  // For the X86 DIV/IDIV instruction, in most cases the dividend
+  // (numerator) must be in a specific register pair highreg:lowreg,
+  // producing the quotient in lowreg and the remainder in highreg.
+  // For most data types, to set up the instruction, the dividend is
+  // copied into lowreg, and lowreg is sign-extended or zero-extended
+  // into highreg.  The exception is i8, where the dividend is defined
+  // as a single register rather than a register pair, and we
+  // therefore directly sign-extend or zero-extend the dividend into
+  // lowreg, instead of copying, and ignore the highreg.
+  const static struct DivRemEntry {
+    // The following portion depends only on the data type.
+    const TargetRegisterClass *RC;
+    unsigned LowInReg;  // low part of the register pair
+    unsigned HighInReg; // high part of the register pair
+    // The following portion depends on both the data type and the operation.
+    struct DivRemResult {
+    unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
+    unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
+                              // highreg, or copying a zero into highreg.
+    unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
+                              // zero/sign-extending into lowreg for i8.
+    unsigned DivRemResultReg; // Register containing the desired result.
+    bool IsOpSigned;          // Whether to use signed or unsigned form.
+    } ResultTable[NumOps];
+  } OpTable[NumTypes] = {
+    { &X86::GR8RegClass,  X86::AX,  0, {
+        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
+        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
+        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
+        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
+      }
+    }, // i8
+    { &X86::GR16RegClass, X86::AX,  X86::DX, {
+        { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
+        { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
+        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::AX,  U }, // UDiv
+        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::DX,  U }, // URem
+      }
+    }, // i16
+    { &X86::GR32RegClass, X86::EAX, X86::EDX, {
+        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
+        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
+        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
+        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
+      }
+    }, // i32
+    { &X86::GR64RegClass, X86::RAX, X86::RDX, {
+        { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
+        { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
+        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RAX, U }, // UDiv
+        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RDX, U }, // URem
+      }
+    }, // i64
+  };
+
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  unsigned TypeIndex, OpIndex;
+  switch (VT.SimpleTy) {
+  default: return false;
+  case MVT::i8:  TypeIndex = 0; break;
+  case MVT::i16: TypeIndex = 1; break;
+  case MVT::i32: TypeIndex = 2; break;
+  case MVT::i64: TypeIndex = 3;
+    if (!Subtarget->is64Bit())
+      return false;
+    break;
+  }
+
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unexpected div/rem opcode");
+  case Instruction::SDiv: OpIndex = 0; break;
+  case Instruction::SRem: OpIndex = 1; break;
+  case Instruction::UDiv: OpIndex = 2; break;
+  case Instruction::URem: OpIndex = 3; break;
+  }
+
+  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
+  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
+  unsigned Op0Reg = getRegForValue(I->getOperand(0));
+  if (Op0Reg == 0)
+    return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0)
+    return false;
+
+  // Move op0 into low-order input register.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
+  // Zero-extend or sign-extend into high-order input register.
+  if (OpEntry.OpSignExtend) {
+    if (OpEntry.IsOpSigned)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(OpEntry.OpSignExtend));
+    else
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
+  }
+  // Generate the DIV/IDIV instruction.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
+  // Copy output register into result register.
+  unsigned ResultReg = createResultReg(TypeEntry.RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+  UpdateValueMap(I, ResultReg);
+
+  return true;
+}
+
 bool X86FastISel::X86SelectSelect(const Instruction *I) {
   MVT VT;
   if (!isTypeLegal(I->getType(), VT))
@@ -2084,6 +2199,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I)  {
   case Instruction::AShr:
   case Instruction::Shl:
     return X86SelectShift(I);
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    return X86SelectDivRem(I);
   case Instruction::Select:
     return X86SelectSelect(I);
   case Instruction::Trunc:
@@ -2275,12 +2395,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
 }
 
 
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
-/// vreg is being provided by the specified load instruction.  If possible,
-/// try to fold the load as an operand to the instruction, returning true if
-/// possible.
-bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                                const LoadInst *LI) {
+bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                      const LoadInst *LI) {
   X86AddressMode AM;
   if (!X86SelectAddress(LI->getOperand(0), AM))
     return false;
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
new file mode 100644
index 0000000..0dd034c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -0,0 +1,253 @@
+//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will find  instructions  which
+// can be re-written as LEA instructions in order to reduce pipeline
+// delays for some models of the Intel Atom family.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-fixup-LEAs"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumLEAs, "Number of LEA instructions created");
+
+namespace {
+  class FixupLEAPass : public MachineFunctionPass {
+    enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
+    static char ID;
+    /// \brief Loop over all of the instructions in the basic block
+    /// replacing applicable instructions with LEA instructions,
+    /// where appropriate.
+    bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+
+    virtual const char *getPassName() const { return "X86 Atom LEA Fixup";}
+
+    /// \brief Given a machine register, look for the instruction
+    /// which writes it in the current basic block. If found,
+    /// try to replace it with an equivalent LEA instruction.
+    /// If replacement succeeds, then also process the the newly created
+    /// instruction.
+    void  seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I,
+                      MachineFunction::iterator MFI);
+
+    /// \brief Given a memory access or LEA instruction
+    /// whose address mode uses a base and/or index register, look for
+    /// an opportunity to replace the instruction which sets the base or index
+    /// register with an equivalent LEA instruction.
+    void processInstruction(MachineBasicBlock::iterator& I,
+                            MachineFunction::iterator MFI);
+
+    /// \brief Determine if an instruction references a machine register
+    /// and, if so, whether it reads or writes the register.
+    RegUsageState usesRegister(MachineOperand& p,
+                               MachineBasicBlock::iterator I);
+
+    /// \brief Step backwards through a basic block, looking
+    /// for an instruction which writes a register within 
+    /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
+    MachineBasicBlock::iterator searchBackwards(MachineOperand& p,
+                                                MachineBasicBlock::iterator& I,
+                                                MachineFunction::iterator MFI);
+
+    /// \brief if an instruction can be converted to an 
+    /// equivalent LEA, insert the new instruction into the basic block
+    /// and return a pointer to it. Otherwise, return zero.
+    MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI,
+                                     MachineBasicBlock::iterator &MBBI) const;
+
+  public:
+    FixupLEAPass() : MachineFunctionPass(ID) {}
+
+    /// \brief Loop over all of the basic blocks,
+    /// replacing instructions by equivalent LEA instructions
+    /// if needed and when possible.
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    MachineFunction *MF;
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII; // Machine instruction info.
+
+  };
+  char FixupLEAPass::ID = 0;
+}
+
+MachineInstr *
+FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
+                                 MachineBasicBlock::iterator &MBBI) const {
+  MachineInstr* MI = MBBI;
+  MachineInstr* NewMI;
+  switch (MI->getOpcode()) {
+  case X86::MOV32rr: 
+  case X86::MOV64rr: {
+    const MachineOperand& Src = MI->getOperand(1);
+    const MachineOperand& Dest = MI->getOperand(0);
+    NewMI = BuildMI(*MF, MI->getDebugLoc(),
+      TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r))
+      .addOperand(Dest)
+      .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0);
+    MFI->insert(MBBI, NewMI);   // Insert the new inst
+    return NewMI;
+  }
+  case X86::ADD64ri32:
+  case X86::ADD64ri8:
+  case X86::ADD64ri32_DB:
+  case X86::ADD64ri8_DB:
+  case X86::ADD32ri:
+  case X86::ADD32ri8:
+  case X86::ADD32ri_DB:
+  case X86::ADD32ri8_DB:
+  case X86::ADD16ri:
+  case X86::ADD16ri8:
+  case X86::ADD16ri_DB:
+  case X86::ADD16ri8_DB:
+    if (!MI->getOperand(2).isImm()) {
+      // convertToThreeAddress will call getImm()
+      // which requires isImm() to be true
+      return 0;
+    }
+  }
+  return TII->convertToThreeAddress(MFI, MBBI, 0);
+}
+
+FunctionPass *llvm::createX86FixupLEAs() {
+  return new FixupLEAPass();
+}
+
+bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
+  MF = &Func;
+  TII = Func.getTarget().getInstrInfo();
+  TM = &MF->getTarget();
+
+  DEBUG(dbgs() << "Start X86FixupLEAs\n";);
+  // Process all basic blocks.
+  for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
+    processBasicBlock(Func, I);
+  DEBUG(dbgs() << "End X86FixupLEAs\n";);
+
+  return true;
+}
+
+FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
+                                MachineBasicBlock::iterator I) {
+  RegUsageState RegUsage = RU_NotUsed;
+  MachineInstr* MI = I;
+
+  for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+    MachineOperand& opnd = MI->getOperand(i);
+    if (opnd.isReg() && opnd.getReg() == p.getReg()){
+      if (opnd.isDef())
+        return RU_Write;
+      RegUsage = RU_Read;
+    }
+  }
+  return RegUsage;
+}
+
+/// getPreviousInstr - Given a reference to an instruction in a basic
+/// block, return a reference to the previous instruction in the block,
+/// wrapping around to the last instruction of the block if the block
+/// branches to itself.
+static inline bool getPreviousInstr(MachineBasicBlock::iterator& I,
+                                    MachineFunction::iterator MFI) {
+  if (I == MFI->begin()) {
+    if (MFI->isPredecessor(MFI)) {
+      I = --MFI->end();
+      return true;
+    }
+    else
+      return false;
+  }
+  --I;
+  return true;
+}
+
+MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
+                                   MachineBasicBlock::iterator& I,
+                                   MachineFunction::iterator MFI) {
+  int InstrDistance = 1;
+  MachineBasicBlock::iterator CurInst;
+  static const int INSTR_DISTANCE_THRESHOLD = 5;
+
+  CurInst = I;
+  bool Found;
+  Found = getPreviousInstr(CurInst, MFI);
+  while( Found && I != CurInst) {
+    if (CurInst->isCall() || CurInst->isInlineAsm())
+      break;
+    if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
+      break; // too far back to make a difference
+    if (usesRegister(p, CurInst) == RU_Write){
+      return CurInst;
+    }
+    InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
+    Found = getPreviousInstr(CurInst, MFI);
+  }
+  return 0;
+}
+
+void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
+                                      MachineFunction::iterator MFI) {
+  // Process a load, store, or LEA instruction.
+  MachineInstr *MI = I;
+  int opcode = MI->getOpcode();
+  const MCInstrDesc& Desc = MI->getDesc();
+  int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode);
+  if (AddrOffset >= 0) {
+    AddrOffset += X86II::getOperandBias(Desc);
+    MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
+    if (p.isReg() && p.getReg() != X86::ESP) {
+      seekLEAFixup(p, I, MFI);
+    }
+    MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+    if (q.isReg() && q.getReg() != X86::ESP) {
+      seekLEAFixup(q, I, MFI);
+    }
+  }
+}
+
+void FixupLEAPass::seekLEAFixup(MachineOperand& p,
+                                MachineBasicBlock::iterator& I,
+                                MachineFunction::iterator MFI) {
+  MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
+  if (MBI) {
+    MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
+    if (NewMI) {
+      ++NumLEAs;
+      DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
+      // now to replace with an equivalent LEA...
+      DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
+      MFI->erase(MBI);
+      MachineBasicBlock::iterator J =
+                             static_cast<MachineBasicBlock::iterator> (NewMI);
+      processInstruction(J, MFI);
+    }
+  }
+}
+
+bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
+                                     MachineFunction::iterator MFI) {
+
+  for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+    processInstruction(I, MFI);
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 54cbd40..42b4e73 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -369,7 +369,14 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
 /// getCompactUnwindRegNum - Get the compact unwind number for a given
 /// register. The number corresponds to the enum lists in
 /// compact_unwind_encoding.h.
-static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) {
+static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) {
+  static const uint16_t CU32BitRegs[] = {
+    X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+  };
+  static const uint16_t CU64BitRegs[] = {
+    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+  };
+  const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs;
   for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
     if (*CURegs == Reg)
       return Idx;
@@ -398,16 +405,8 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
   //     4       3
   //     5       3
   //
-  static const uint16_t CU32BitRegs[] = {
-    X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
-  };
-  static const uint16_t CU64BitRegs[] = {
-    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
-  };
-  const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
-
   for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
-    int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
+    int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit);
     if (CUReg == -1) return ~0U;
     SavedRegs[i] = CUReg;
   }
@@ -466,14 +465,6 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
 static uint32_t
 encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
                                       bool Is64Bit) {
-  static const uint16_t CU32BitRegs[] = {
-    X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
-  };
-  static const uint16_t CU64BitRegs[] = {
-    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
-  };
-  const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
-
   // Encode the registers in the order they were saved, 3-bits per register. The
   // registers are numbered from 1 to CU_NUM_SAVED_REGS.
   uint32_t RegEnc = 0;
@@ -481,7 +472,7 @@ encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
     unsigned Reg = SavedRegs[I];
     if (Reg == 0) continue;
 
-    int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
+    int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit);
     if (CURegNum == -1) return ~0U;
 
     // Encode the 3-bit register number in order, skipping over 3-bits for each
@@ -528,11 +519,17 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
     if (!MI.getFlag(MachineInstr::FrameSetup)) break;
 
     // We don't exect any more prolog instructions.
-    if (ExpectEnd) return 0;
+    if (ExpectEnd) return CU::UNWIND_MODE_DWARF;
 
     if (Opc == PushInstr) {
       // If there are too many saved registers, we cannot use compact encoding.
-      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
+      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF;
+
+      unsigned Reg = MI.getOperand(0).getReg();
+      if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) {
+        ExpectEnd = true;
+        continue;
+      }
 
       SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
       StackAdjust += OffsetSize;
@@ -542,7 +539,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
       unsigned DstReg = MI.getOperand(0).getReg();
 
       if (DstReg != FramePtr || SrcReg != StackPtr)
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       StackAdjust = 0;
       memset(SavedRegs, 0, sizeof(SavedRegs));
@@ -552,7 +549,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
                Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
       if (StackSize)
         // We already have a stack size.
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       if (!MI.getOperand(0).isReg() ||
           MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
@@ -560,7 +557,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
         // We need this to be a stack adjustment pointer. Something like:
         //
         //   %RSP<def> = SUB64ri8 %RSP, 48
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       StackSize = MI.getOperand(2).getImm() / StackDivide;
       SubtractInstrIdx += InstrOffset;
@@ -574,31 +571,31 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   if (HasFP) {
     if ((StackAdjust & 0xFF) != StackAdjust)
       // Offset was too big for compact encoding.
-      return 0;
+      return CU::UNWIND_MODE_DWARF;
 
     // Get the encoding of the saved registers when we have a frame pointer.
     uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
-    if (RegEnc == ~0U) return 0;
+    if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
 
-    CompactUnwindEncoding |= 0x01000000;
+    CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
     CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
-    CompactUnwindEncoding |= RegEnc & 0x7FFF;
+    CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
   } else {
     ++StackAdjust;
     uint32_t TotalStackSize = StackAdjust + StackSize;
     if ((TotalStackSize & 0xFF) == TotalStackSize) {
       // Frameless stack with a small stack size.
-      CompactUnwindEncoding |= 0x02000000;
+      CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
 
       // Encode the stack size.
       CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
     } else {
       if ((StackAdjust & 0x7) != StackAdjust)
         // The extra stack adjustments are too big for us to handle.
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       // Frameless stack with an offset too large for us to encode compactly.
-      CompactUnwindEncoding |= 0x03000000;
+      CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
 
       // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
       // instruction.
@@ -616,10 +613,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
     uint32_t RegEnc =
       encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
                                                Is64Bit);
-    if (RegEnc == ~0U) return 0;
+    if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
 
     // Encode the register encoding.
-    CompactUnwindEncoding |= RegEnc & 0x3FF;
+    CompactUnwindEncoding |=
+      RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
   }
 
   return CompactUnwindEncoding;
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 3f08b9a..6e309d8 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -19,8 +19,35 @@
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
-  class MCSymbol;
-  class X86TargetMachine;
+
+namespace CU {
+
+  /// Compact unwind encoding values.
+  enum CompactUnwindEncodings {
+    /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
+    /// the return address, then [RE]SP is moved to [RE]BP.
+    UNWIND_MODE_BP_FRAME                   = 0x01000000,
+
+    /// A frameless function with a small constant stack size.
+    UNWIND_MODE_STACK_IMMD                 = 0x02000000,
+
+    /// A frameless function with a large constant stack size.
+    UNWIND_MODE_STACK_IND                  = 0x03000000,
+
+    /// No compact unwind encoding is available.
+    UNWIND_MODE_DWARF                      = 0x04000000,
+
+    /// Mask for encoding the frame registers.
+    UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
+
+    /// Mask for encoding the frameless registers.
+    UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
+  };
+
+} // end CU namespace
+
+class MCSymbol;
+class X86TargetMachine;
 
 class X86FrameLowering : public TargetFrameLowering {
   const X86TargetMachine &TM;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6041669..968b358 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1503,8 +1503,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
   SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
-                                           MVT::i32, MVT::i32, MVT::Other, Ops,
-                                           array_lengthof(Ops));
+                                           MVT::i32, MVT::i32, MVT::Other, Ops);
   cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
   return ResNode;
 }
@@ -1720,7 +1719,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
       Op = ADD;
       break;
   }
-  
+
   Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
   bool isUnOp = !Val.getNode();
   bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
@@ -1772,12 +1771,10 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   if (isUnOp) {
     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
-    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   } else {
     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
-    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   }
   cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
   SDValue RetVals[] = { Undef, Ret };
@@ -1971,8 +1968,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
   SDValue Segment = CurDAG->getRegister(0, MVT::i32);
   const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
                           Disp, Segment, VMask, Chain};
-  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
-                                           VTs, Ops, array_lengthof(Ops));
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), VTs, Ops);
   // Node has 2 outputs: VDst and MVT::Other.
   // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
   // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
@@ -2186,7 +2182,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
     SDValue Ops[] = {N1, InFlag};
-    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
 
     ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
     ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
@@ -2267,16 +2263,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                         InFlag };
       if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         ResHi = SDValue(CNode, 0);
         ResLo = SDValue(CNode, 1);
         Chain = SDValue(CNode, 2);
         InFlag = SDValue(CNode, 3);
       } else {
         SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         Chain = SDValue(CNode, 0);
         InFlag = SDValue(CNode, 1);
       }
@@ -2287,15 +2281,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { N1, InFlag };
       if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
         ResHi = SDValue(CNode, 0);
         ResLo = SDValue(CNode, 1);
         InFlag = SDValue(CNode, 2);
       } else {
         SDVTList VTs = CurDAG->getVTList(MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
         InFlag = SDValue(CNode, 0);
       }
     }
@@ -2343,6 +2335,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
     }
 
+    // Propagate ordering to the last node, for now.
+    CurDAG->AssignOrdering(InFlag.getNode(), CurDAG->GetOrdering(Node));
+
     return NULL;
   }
 
@@ -2409,8 +2404,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
         Move =
           SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
-                                         MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+                                         MVT::Other, Ops), 0);
         Chain = Move.getValue(1);
         ReplaceUses(N0.getValue(1), Chain);
       } else {
@@ -2441,8 +2435,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
       SDNode *CNode =
-        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
-                               array_lengthof(Ops));
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
       InFlag = SDValue(CNode, 1);
       // Update the chain.
       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
@@ -2674,8 +2667,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
     MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
                                                    Node->getDebugLoc(),
-                                                   MVT::i32, MVT::Other, Ops,
-                                                   array_lengthof(Ops));
+                                                   MVT::i32, MVT::Other, Ops);
     Result->setMemRefs(MemOp, MemOp + 2);
 
     ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6934186..f69f5d8 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -163,10 +163,28 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   Subtarget = &TM.getSubtarget<X86Subtarget>();
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
-
   RegInfo = TM.getRegisterInfo();
   TD = getDataLayout();
 
+  resetOperationActions();
+}
+
+void X86TargetLowering::resetOperationActions() {
+  const TargetMachine &TM = getTargetMachine();
+  static bool FirstTimeThrough = true;
+
+  // If none of the target options have changed, then we don't need to reset the
+  // operation actions.
+  if (!FirstTimeThrough && TO == TM.Options) return;
+
+  if (!FirstTimeThrough) {
+    // Reinitialize the actions.
+    initActions();
+    FirstTimeThrough = false;
+  }
+
+  TO = TM.Options;
+
   // Set up the TargetLowering object.
   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
 
@@ -508,16 +526,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->hasSSE1())
     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
 
-  setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);
   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
 
-  // On X86 and X86-64, atomic operations are lowered to locked instructions.
-  // Locked instructions, in turn, have implicit fence semantics (all memory
-  // operations are flushed before issuing the locked instruction, and they
-  // are not buffered), so we can fold away the common pattern of
-  // fence-atomic-fence.
-  setShouldFoldAtomicFences(true);
-
   // Expand certain atomics
   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
     MVT VT = IntVTs[i];
@@ -1785,7 +1795,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
       if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
       SDValue Ops[] = { Chain, InFlag };
       Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
-                                         MVT::Other, MVT::Glue, Ops, 2), 1);
+                                         MVT::Other, MVT::Glue, Ops), 1);
       Val = Chain.getValue(0);
 
       // Round the f80 to the right size, which also moves it to the appropriate
@@ -4404,13 +4414,15 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
     if (Subtarget->hasInt256()) { // AVX2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+                        array_lengthof(Ops));
     } else {
       // 256-bit logic and arithmetic instructions in AVX are all
       // floating-point, no support for integer ops. Emit fp zeroed vectors.
       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
+                        array_lengthof(Ops));
     }
   } else
     llvm_unreachable("Unexpected vector type");
@@ -4431,7 +4443,8 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
   if (VT.is256BitVector()) {
     if (HasInt256) { // AVX2
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+                        array_lengthof(Ops));
     } else { // AVX
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
@@ -5101,7 +5114,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
     SDValue ResNode =
-        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
+                                array_lengthof(Ops), MVT::i64,
                                 LDBase->getPointerInfo(),
                                 LDBase->getAlignment(),
                                 false/*isVolatile*/, true/*ReadMem*/,
@@ -7624,10 +7638,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
 
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
   } else {
     SDValue Ops[]  = { Chain, TGA };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
   }
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -7937,7 +7951,7 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
   }
 
   SDValue Ops[2] = { Lo, Hi };
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
 }
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -8220,8 +8234,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
   SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
-  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
-                                         MVT::i64, MMO);
+  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
+                                         array_lengthof(Ops), MVT::i64, MMO);
 
   APInt FF(32, 0x5F800000ULL);
 
@@ -8313,8 +8327,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     MachineMemOperand *MMO =
       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
                               MachineMemOperand::MOLoad, MemSize, MemSize);
-    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
-                                    DstTy, MMO);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops,
+                                    array_lengthof(Ops), DstTy, MMO);
     Chain = Value.getValue(1);
     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
     StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
@@ -8328,7 +8342,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     // Build the FP_TO_INT*_IN_MEM
     SDValue Ops[] = { Chain, Value, StackSlot };
     SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
-                                           Ops, 3, DstTy, MMO);
+                                           Ops, array_lengthof(Ops), DstTy,
+                                           MMO);
     return std::make_pair(FIST, StackSlot);
   } else {
     SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
@@ -8340,8 +8355,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
       MVT::i32, eax.getValue(2));
     SDValue Ops[] = { eax, edx };
     SDValue pair = IsReplace
-      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2)
-      : DAG.getMergeValues(Ops, 2, DL);
+      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops))
+      : DAG.getMergeValues(Ops, array_lengthof(Ops), DL);
     return std::make_pair(pair, SDValue());
   }
 }
@@ -9165,14 +9180,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
   }
 
   if (LHS.getNode()) {
-    // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
-    // the condition code later.
-    bool Invert = false;
-    if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
-      Invert = true;
-      LHS = LHS.getOperand(0);
-    }
-
     // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
     // instruction.  Since the shift amount is in-range-or-undefined, we know
     // that doing a bittest on the i32 value is ok.  We extend to i32 because
@@ -9189,9 +9196,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
 
     SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
     X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
-    // Flip the condition if the LHS was a not instruction
-    if (Invert)
-      Cond = X86::GetOppositeBranchCondition(Cond);
     return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                        DAG.getConstant(Cond, MVT::i8), BT);
   }
@@ -9335,14 +9339,54 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
   // Check that the operation in question is available (most are plain SSE2,
   // but PCMPGTQ and PCMPEQQ have different requirements).
   if (VT == MVT::v2i64) {
-    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
-      return SDValue();
+    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
+      assert(Subtarget->hasSSE2() && "Don't know how to lower!");
+
+      // First cast everything to the right type.
+      Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+      Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+      // Since SSE has no unsigned integer comparisons, we need to flip the sign
+      // bits of the inputs before performing those operations. The lower
+      // compare is always unsigned.
+      SDValue SB;
+      if (FlipSigns) {
+        SB = DAG.getConstant(0x80000000U, MVT::v4i32);
+      } else {
+        SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32);
+        SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32);
+        SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                         Sign, Zero, Sign, Zero);
+      }
+      Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
+      Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
+
+      // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
+      SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+      SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
+
+      // Create masks for only the low parts/high parts of the 64 bit integers.
+      const int MaskHi[] = { 1, 1, 3, 3 };
+      const int MaskLo[] = { 0, 0, 2, 2 };
+      SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
+      SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
+      SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
+
+      SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
+      Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
+
+      if (Invert)
+        Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+      return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+    }
+
     if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
       // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
       // pcmpeqd + pshufd + pand.
       assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
 
-      // First cast everything to the right type,
+      // First cast everything to the right type.
       Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
       Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
 
@@ -9361,17 +9405,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
     }
   }
 
-  // Since SSE has no unsigned integer comparisons, we need to flip  the sign
+  // Since SSE has no unsigned integer comparisons, we need to flip the sign
   // bits of the inputs before performing those operations.
   if (FlipSigns) {
     EVT EltVT = VT.getVectorElementType();
-    SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
-                                      EltVT);
-    std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
-    SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
-                                    SignBits.size());
-    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
-    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+    SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT);
+    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
+    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
   }
 
   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -10937,7 +10977,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
                       SDValue(Result.getNode(), 1) };
     SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
                                   DAG.getVTList(Op->getValueType(1), MVT::Glue),
-                                  Ops, 4);
+                                  Ops, array_lengthof(Ops));
 
     // Return { result, isValid, chain }.
     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
@@ -10990,7 +11030,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+  unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
+          (FrameReg == X86::EBP && VT == MVT::i32)) &&
+         "Invalid Frame Register!");
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
@@ -11010,21 +11053,23 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Handler   = Op.getOperand(2);
   DebugLoc dl       = Op.getDebugLoc();
 
-  SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
-                                     Subtarget->is64Bit() ? X86::RBP : X86::EBP,
-                                     getPointerTy());
-  unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
-
-  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
-                                  DAG.getIntPtrConstant(RegInfo->getSlotSize()));
-  StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+  EVT PtrVT = getPointerTy();
+  unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+  assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
+          (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
+         "Invalid Frame Register!");
+  SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
+  unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
+
+  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
+                                 DAG.getIntPtrConstant(RegInfo->getSlotSize()));
+  StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
 
-  return DAG.getNode(X86ISD::EH_RETURN, dl,
-                     MVT::Other,
-                     Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+  return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
+                     DAG.getRegister(StoreAddrReg, PtrVT));
 }
 
 SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
@@ -11235,7 +11280,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
   SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
                                           DAG.getVTList(MVT::Other),
-                                          Ops, 2, MVT::i16, MMO);
+                                          Ops, array_lengthof(Ops), MVT::i16,
+                                          MMO);
 
   // Load FP Control Word from stack slot
   SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
@@ -12075,52 +12121,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
   }
 }
 
-static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
-                              SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
-  // There isn't any reason to disable it if the target processor supports it.
-  if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
-    SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0, MVT::i32);
-    SDValue Ops[] = {
-      DAG.getRegister(X86::ESP, MVT::i32), // Base
-      DAG.getTargetConstant(1, MVT::i8),   // Scale
-      DAG.getRegister(0, MVT::i32),        // Index
-      DAG.getTargetConstant(0, MVT::i32),  // Disp
-      DAG.getRegister(0, MVT::i32),        // Segment.
-      Zero,
-      Chain
-    };
-    SDNode *Res =
-      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
-                          array_lengthof(Ops));
-    return SDValue(Res, 0);
-  }
-
-  unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
-  if (!isDev)
-    return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-
-  unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
-  unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-  unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
-  // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-  if (!Op1 && !Op2 && !Op3 && Op4)
-    return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-
-  // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-  if (Op1 && !Op2 && !Op3 && !Op4)
-    return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-
-  // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
-  //           (MFENCE)>;
-  return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
-}
-
 static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
                                  SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
@@ -12149,9 +12149,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
       Zero,
       Chain
     };
-    SDNode *Res =
-      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
-                         array_lengthof(Ops));
+    SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
     return SDValue(Res, 0);
   }
 
@@ -12185,7 +12183,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
   SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
-                                           Ops, 5, T, MMO);
+                                           Ops, array_lengthof(Ops), T, MMO);
   SDValue cpOut =
     DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
   return cpOut;
@@ -12207,7 +12205,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
     DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
     rdx.getValue(1)
   };
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
 }
 
 SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
@@ -12301,7 +12299,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
 
   // For MacOSX, we want to call an alternative entry point: __sincos_stret,
-  // which returns the values in two XMM registers.
+  // which returns the values as { float, float } (in XMM0) or
+  // { double, double } (which is returned in XMM0, XMM1).
   DebugLoc dl = Op.getDebugLoc();
   SDValue Arg = Op.getOperand(0);
   EVT ArgVT = Arg.getValueType();
@@ -12316,14 +12315,16 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   Entry.isZExt = false;
   Args.push_back(Entry);
 
+  bool isF64 = ArgVT == MVT::f64;
   // Only optimize x86_64 for now. i386 is a bit messy. For f32,
   // the small struct {f32, f32} is returned in (eax, edx). For f64,
   // the results are returned via SRet in memory.
-  const char *LibcallName = (ArgVT == MVT::f64)
-    ? "__sincos_stret" : "__sincosf_stret";
+  const char *LibcallName =  isF64 ? "__sincos_stret" : "__sincosf_stret";
   SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
 
-  StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+  Type *RetTy = isF64
+    ? (Type*)StructType::get(ArgTy, ArgTy, NULL)
+    : (Type*)VectorType::get(ArgTy, 4);
   TargetLowering::
     CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
                          false, false, false, false, 0,
@@ -12331,7 +12332,18 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
                          /*doesNotRet=*/false, /*isReturnValueUsed*/true,
                          Callee, Args, DAG, dl);
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
-  return CallResult.first;
+
+  if (isF64)
+    // Returned in xmm0 and xmm1.
+    return CallResult.first;
+
+  // Returned in bits 0:31 and 32:64 xmm0.
+  SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+                               CallResult.first, DAG.getIntPtrConstant(0));
+  SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+                               CallResult.first, DAG.getIntPtrConstant(1));
+  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
 }
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
@@ -12340,7 +12352,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op,DAG);
-  case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, Subtarget, DAG);
   case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, Subtarget, DAG);
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op, Subtarget, DAG);
   case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
@@ -12457,7 +12468,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
   SDValue Ops[] = { Chain, In1, In2L, In2H };
   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
   SDValue Result =
-    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64,
                             cast<MemSDNode>(Node)->getMemOperand());
   SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
@@ -12537,7 +12548,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                      eax.getValue(2));
     // Use a buildpair to merge the two 32-bit values into a 64-bit one.
     SDValue Ops[] = { eax, edx };
-    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops,
+                                  array_lengthof(Ops)));
     Results.push_back(edx.getValue(1));
     return;
   }
@@ -12576,7 +12588,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
                                   X86ISD::LCMPXCHG8_DAG;
     SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
-                                             Ops, 3, T, MMO);
+                                             Ops, array_lengthof(Ops), T, MMO);
     SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
                                         Regs64bit ? X86::RAX : X86::EAX,
                                         HalfT, Result.getValue(1));
@@ -15063,7 +15075,8 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
         SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
         SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
         SDValue ResNode =
-          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                  array_lengthof(Ops),
                                   Ld->getMemoryVT(),
                                   Ld->getPointerInfo(),
                                   Ld->getAlignment(),
@@ -15755,6 +15768,51 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
       return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
 
+  // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
+  if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
+      Cond.getOpcode() == ISD::SETCC) {
+
+    assert(Cond.getValueType().isVector() &&
+           "vector select expects a vector selector!");
+
+    EVT IntVT = Cond.getValueType();
+    bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
+    bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+    if (!TValIsAllOnes && !FValIsAllZeros) {
+      // Try invert the condition if true value is not all 1s and false value
+      // is not all 0s.
+      bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+      bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
+
+      if (TValIsAllZeros || FValIsAllOnes) {
+        SDValue CC = Cond.getOperand(2);
+        ISD::CondCode NewCC =
+          ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                               Cond.getOperand(0).getValueType().isInteger());
+        Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
+        std::swap(LHS, RHS);
+        TValIsAllOnes = FValIsAllOnes;
+        FValIsAllZeros = TValIsAllZeros;
+      }
+    }
+
+    if (TValIsAllOnes || FValIsAllZeros) {
+      SDValue Ret;
+
+      if (TValIsAllOnes && FValIsAllZeros)
+        Ret = Cond;
+      else if (TValIsAllOnes)
+        Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond,
+                          DAG.getNode(ISD::BITCAST, DL, IntVT, RHS));
+      else if (FValIsAllZeros)
+        Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond,
+                          DAG.getNode(ISD::BITCAST, DL, IntVT, LHS));
+
+      return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
+    }
+  }
+
   // If we know that this node is legal then we know that it is going to be
   // matched by one of the SSE/AVX BLEND instructions. These instructions only
   // depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -15815,6 +15873,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   SDValue SetCC;
   const ConstantSDNode* C = 0;
   bool needOppositeCond = (CC == X86::COND_E);
+  bool checkAgainstTrue = false; // Is it a comparison against 1?
 
   if ((C = dyn_cast<ConstantSDNode>(Op1)))
     SetCC = Op2;
@@ -15823,18 +15882,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   else // Quit if all operands are not constants.
     return SDValue();
 
-  if (C->getZExtValue() == 1)
+  if (C->getZExtValue() == 1) {
     needOppositeCond = !needOppositeCond;
-  else if (C->getZExtValue() != 0)
+    checkAgainstTrue = true;
+  } else if (C->getZExtValue() != 0)
     // Quit if the constant is neither 0 or 1.
     return SDValue();
 
-  // Skip 'zext' or 'trunc' node.
-  if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
-      SetCC.getOpcode() == ISD::TRUNCATE)
-    SetCC = SetCC.getOperand(0);
+  bool truncatedToBoolWithAnd = false;
+  // Skip (zext $x), (trunc $x), or (and $x, 1) node.
+  while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+         SetCC.getOpcode() == ISD::TRUNCATE ||
+         SetCC.getOpcode() == ISD::AND) {
+    if (SetCC.getOpcode() == ISD::AND) {
+      int OpIdx = -1;
+      ConstantSDNode *CS;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 1;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 0;
+      if (OpIdx == -1)
+        break;
+      SetCC = SetCC.getOperand(OpIdx);
+      truncatedToBoolWithAnd = true;
+    } else
+      SetCC = SetCC.getOperand(0);
+  }
 
   switch (SetCC.getOpcode()) {
+  case X86ISD::SETCC_CARRY:
+    // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
+    // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
+    // i.e. it's a comparison against true but the result of SETCC_CARRY is not
+    // truncated to i1 using 'and'.
+    if (checkAgainstTrue && !truncatedToBoolWithAnd)
+      break;
+    assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
+           "Invalid use of SETCC_CARRY!");
+    // FALL THROUGH
   case X86ISD::SETCC:
     // Set the condition code or opposite one if necessary.
     CC = X86::CondCode(SetCC.getConstantOperandVal(0));
@@ -16165,8 +16252,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
-///                       when possible.
+/// PerformShiftCombine - Combine shifts.
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget *Subtarget) {
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 5725f7a..2727e22 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -723,6 +723,9 @@ namespace llvm {
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                       SelectionDAG &DAG) const;
 
+    /// \brief Reset the operation actions based on target options.
+    virtual void resetOperationActions();
+
   protected:
     std::pair<const TargetRegisterClass*, uint8_t>
     findRepresentativeClass(MVT VT) const;
@@ -734,6 +737,10 @@ namespace llvm {
     const X86RegisterInfo *RegInfo;
     const DataLayout *TD;
 
+    /// Used to store the TargetOptions so that we don't waste time resetting
+    /// the operation actions unless we have to.
+    TargetOptions TO;
+
     /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     /// floating point ops.
     /// When SSE is available, use it for f32 operations.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index 0ef9491..a71e024 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -35,25 +35,27 @@ def MRM_C3 : Format<35>;
 def MRM_C4 : Format<36>;
 def MRM_C8 : Format<37>;
 def MRM_C9 : Format<38>;
-def MRM_E8 : Format<39>;
-def MRM_F0 : Format<40>;
-def MRM_F8 : Format<41>;
-def MRM_F9 : Format<42>;
+def MRM_CA : Format<39>;
+def MRM_CB : Format<40>;
+def MRM_E8 : Format<41>;
+def MRM_F0 : Format<42>;
 def RawFrmImm8 : Format<43>;
 def RawFrmImm16 : Format<44>;
-def MRM_D0 : Format<45>;
-def MRM_D1 : Format<46>;
-def MRM_D4 : Format<47>;
-def MRM_D5 : Format<48>;
-def MRM_D6 : Format<49>;
-def MRM_D8 : Format<50>;
-def MRM_D9 : Format<51>;
-def MRM_DA : Format<52>;
-def MRM_DB : Format<53>;
-def MRM_DC : Format<54>;
-def MRM_DD : Format<55>;
-def MRM_DE : Format<56>;
-def MRM_DF : Format<57>;
+def MRM_F8 : Format<45>;
+def MRM_F9 : Format<46>;
+def MRM_D0 : Format<47>;
+def MRM_D1 : Format<48>;
+def MRM_D4 : Format<49>;
+def MRM_D5 : Format<50>;
+def MRM_D6 : Format<51>;
+def MRM_D8 : Format<52>;
+def MRM_D9 : Format<53>;
+def MRM_DA : Format<54>;
+def MRM_DB : Format<55>;
+def MRM_DC : Format<56>;
+def MRM_DD : Format<57>;
+def MRM_DE : Format<58>;
+def MRM_DF : Format<59>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 7ba542c..7c0423f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4281,7 +4281,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     bool isAligned = (*MMOs.first) &&
                      (*MMOs.first)->getAlignment() >= Alignment;
     Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
-                              VT, MVT::Other, &AddrOps[0], AddrOps.size());
+                              VT, MVT::Other, AddrOps);
     NewNodes.push_back(Load);
 
     // Preserve memory reference information.
@@ -4303,8 +4303,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   if (Load)
     BeforeOps.push_back(SDValue(Load, 0));
   std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
-  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
-                                      BeforeOps.size());
+  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
   NewNodes.push_back(NewNode);
 
   // Emit the store instruction.
@@ -4326,8 +4325,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                      (*MMOs.first)->getAlignment() >= Alignment;
     SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
                                                          isAligned, TM),
-                                       dl, MVT::Other,
-                                       &AddrOps[0], AddrOps.size());
+                                       dl, MVT::Other, AddrOps);
     NewNodes.push_back(Store);
 
     // Preserve memory reference information.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index ccc1aa2..3380d8c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1833,90 +1833,90 @@ include "X86InstrCompiler.td"
 // Assembler Mnemonic Aliases
 //===----------------------------------------------------------------------===//
 
-def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"cbw",  "cbtw">;
-def : MnemonicAlias<"cwde", "cwtl">;
-def : MnemonicAlias<"cwd",  "cwtd">;
-def : MnemonicAlias<"cdq", "cltd">;
-def : MnemonicAlias<"cdqe", "cltq">;
-def : MnemonicAlias<"cqo", "cqto">;
+def : MnemonicAlias<"cbw",  "cbtw", "att">;
+def : MnemonicAlias<"cwde", "cwtl", "att">;
+def : MnemonicAlias<"cwd",  "cwtd", "att">;
+def : MnemonicAlias<"cdq",  "cltd", "att">;
+def : MnemonicAlias<"cdqe", "cltq", "att">;
+def : MnemonicAlias<"cqo",  "cqto", "att">;
 
 // lret maps to lretl, it is not ambiguous with lretq.
-def : MnemonicAlias<"lret", "lretl">;
+def : MnemonicAlias<"lret", "lretl", "att">;
 
-def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"leavel", "leave", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"loopz", "loope">;
-def : MnemonicAlias<"loopnz", "loopne">;
+def : MnemonicAlias<"loopz",  "loope",  "att">;
+def : MnemonicAlias<"loopnz", "loopne", "att">;
 
-def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popfd",  "popfl">;
+def : MnemonicAlias<"pop",   "popl",  "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop",   "popq",  "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf",  "popfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf",  "popfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl", "att">;
 
 // FIXME: This is wrong for "push reg".  "push %bx" should turn into pushw in
 // all modes.  However: "push (addr)" and "push $42" should default to
 // pushl/pushq depending on the current mode.  Similar for "pop %bx"
-def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushfd", "pushfl">;
+def : MnemonicAlias<"push",   "pushl",  "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push",   "pushq",  "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf",  "pushfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf",  "pushfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl", "att">;
 
-def : MnemonicAlias<"repe", "rep">;
-def : MnemonicAlias<"repz", "rep">;
-def : MnemonicAlias<"repnz", "repne">;
+def : MnemonicAlias<"repe",  "rep",   "att">;
+def : MnemonicAlias<"repz",  "rep",   "att">;
+def : MnemonicAlias<"repnz", "repne", "att">;
 
-def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"retl", "ret", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"salb", "shlb">;
-def : MnemonicAlias<"salw", "shlw">;
-def : MnemonicAlias<"sall", "shll">;
-def : MnemonicAlias<"salq", "shlq">;
+def : MnemonicAlias<"salb", "shlb", "att">;
+def : MnemonicAlias<"salw", "shlw", "att">;
+def : MnemonicAlias<"sall", "shll", "att">;
+def : MnemonicAlias<"salq", "shlq", "att">;
 
-def : MnemonicAlias<"smovb", "movsb">;
-def : MnemonicAlias<"smovw", "movsw">;
-def : MnemonicAlias<"smovl", "movsl">;
-def : MnemonicAlias<"smovq", "movsq">;
+def : MnemonicAlias<"smovb", "movsb", "att">;
+def : MnemonicAlias<"smovw", "movsw", "att">;
+def : MnemonicAlias<"smovl", "movsl", "att">;
+def : MnemonicAlias<"smovq", "movsq", "att">;
 
-def : MnemonicAlias<"ud2a", "ud2">;
-def : MnemonicAlias<"verrw", "verr">;
+def : MnemonicAlias<"ud2a",  "ud2",  "att">;
+def : MnemonicAlias<"verrw", "verr", "att">;
 
 // System instruction aliases.
-def : MnemonicAlias<"iret", "iretl">;
-def : MnemonicAlias<"sysret", "sysretl">;
-def : MnemonicAlias<"sysexit", "sysexitl">;
+def : MnemonicAlias<"iret",    "iretl",    "att">;
+def : MnemonicAlias<"sysret",  "sysretl",  "att">;
+def : MnemonicAlias<"sysexit", "sysexitl", "att">;
 
-def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt", "att">, Requires<[In64BitMode]>;
 
 
 // Floating point stack aliases.
-def : MnemonicAlias<"fcmovz",   "fcmove">;
-def : MnemonicAlias<"fcmova",   "fcmovnbe">;
-def : MnemonicAlias<"fcmovnae", "fcmovb">;
-def : MnemonicAlias<"fcmovna",  "fcmovbe">;
-def : MnemonicAlias<"fcmovae",  "fcmovnb">;
-def : MnemonicAlias<"fcomip",   "fcompi">;
-def : MnemonicAlias<"fildq",    "fildll">;
-def : MnemonicAlias<"fistpq",   "fistpll">;
-def : MnemonicAlias<"fisttpq",  "fisttpll">;
-def : MnemonicAlias<"fldcww",   "fldcw">;
-def : MnemonicAlias<"fnstcww", "fnstcw">;
-def : MnemonicAlias<"fnstsww", "fnstsw">;
-def : MnemonicAlias<"fucomip",  "fucompi">;
-def : MnemonicAlias<"fwait",    "wait">;
+def : MnemonicAlias<"fcmovz",   "fcmove",   "att">;
+def : MnemonicAlias<"fcmova",   "fcmovnbe", "att">;
+def : MnemonicAlias<"fcmovnae", "fcmovb",   "att">;
+def : MnemonicAlias<"fcmovna",  "fcmovbe",  "att">;
+def : MnemonicAlias<"fcmovae",  "fcmovnb",  "att">;
+def : MnemonicAlias<"fcomip",   "fcompi",   "att">;
+def : MnemonicAlias<"fildq",    "fildll",   "att">;
+def : MnemonicAlias<"fistpq",   "fistpll",  "att">;
+def : MnemonicAlias<"fisttpq",  "fisttpll", "att">;
+def : MnemonicAlias<"fldcww",   "fldcw",    "att">;
+def : MnemonicAlias<"fnstcww",  "fnstcw",   "att">;
+def : MnemonicAlias<"fnstsww",  "fnstsw",   "att">;
+def : MnemonicAlias<"fucomip",  "fucompi",  "att">;
+def : MnemonicAlias<"fwait",    "wait",     "att">;
 
 
 class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 3842387..cce938b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4462,12 +4462,12 @@ def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
 // Move Packed Doubleword Int first element to Doubleword Int
 //
 let SchedRW = [WriteMove] in {
-def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                          "vmov{d|q}\t{$src, $dst|$dst, $src}",
+def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                          "mov{d|q}\t{$src, $dst|$dst, $src}",
                           [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
                                                            (iPTR 0)))],
                                                            IIC_SSE_MOVD_ToGP>,
-                      TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+                      VEX;
 
 def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
@@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
                     Sched<[WriteVecALULd]>;
 }
 
+// Helper fragments to match sext vXi1 to vXiY.
+def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
+                                               VR128:$src))>;
+def v8i1sextv8i16  : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
+def v4i1sextv4i32  : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
+def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
+                                               VR256:$src))>;
+def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
+def v8i1sextv8i32  : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
+
 let Predicates = [HasAVX] in {
   defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
                                   int_x86_ssse3_pabs_b_128>, VEX;
@@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in {
                                   int_x86_ssse3_pabs_w_128>, VEX;
   defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
                                   int_x86_ssse3_pabs_d_128>, VEX;
+
+  def : Pat<(xor
+            (bc_v2i64 (v16i1sextv16i8)),
+            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+            (VPABSBrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v8i1sextv8i16)),
+            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+            (VPABSWrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v4i1sextv4i32)),
+            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+            (VPABSDrr128 VR128:$src)>;
 }
 
 let Predicates = [HasAVX2] in {
@@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in {
                                     int_x86_avx2_pabs_w>, VEX, VEX_L;
   defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
                                     int_x86_avx2_pabs_d>, VEX, VEX_L;
+
+  def : Pat<(xor
+            (bc_v4i64 (v32i1sextv32i8)),
+            (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
+            (VPABSBrr256 VR256:$src)>;
+  def : Pat<(xor
+            (bc_v4i64 (v16i1sextv16i16)),
+            (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
+            (VPABSWrr256 VR256:$src)>;
+  def : Pat<(xor
+            (bc_v4i64 (v8i1sextv8i32)),
+            (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
+            (VPABSDrr256 VR256:$src)>;
 }
 
 defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
@@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
 defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
                               int_x86_ssse3_pabs_d_128>;
 
+let Predicates = [HasSSSE3] in {
+  def : Pat<(xor
+            (bc_v2i64 (v16i1sextv16i8)),
+            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+            (PABSBrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v8i1sextv8i16)),
+            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+            (PABSWrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v4i1sextv4i32)),
+            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+            (PABSDrr128 VR128:$src)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Binary Operator Instructions
 //===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 5b6298b..89c1a68 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -34,7 +34,7 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
 def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
                    "shl{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-                   
+
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "shl{w}\t{$src2, $dst|$dst, $src2}",
@@ -43,7 +43,7 @@ def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
 def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "shl{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>;
-def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
+def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst),
                     (ins GR64:$src1, i8imm:$src2),
                     "shl{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 053417c..bab3cdd 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -449,15 +449,15 @@ let Uses = [RDX, RAX] in {
   def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
                "xsave\t$dst", []>, TB;
   def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
-                 "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                 "xsave{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
   def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
                "xrstor\t$dst", []>, TB;
   def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
-                 "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                 "xrstor{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
   def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
                   "xsaveopt\t$dst", []>, TB;
   def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
-                    "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                    "xsaveopt{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
 }
 } // SchedRW
 
@@ -515,8 +515,15 @@ let Predicates = [HasFSGSBase, In64BitMode] in {
 //===----------------------------------------------------------------------===//
 // INVPCID Instruction
 def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
-                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
                 Requires<[In32BitMode]>;
 def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
-                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
                 Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// SMAP Instruction
+let Defs = [EFLAGS], Uses = [EFLAGS] in {
+  def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
+  def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
index 7de6791..84c9203 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -18,7 +18,7 @@ def HaswellModel : SchedMachineModel {
   let IssueWidth = 4;
   let MinLatency = 0; // 0 = Out-of-order execution.
   let LoadLatency = 4;
-  let ILPWindow = 40;
+  let ILPWindow = 30;
   let MispredictPenalty = 16;
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 74d5f1b..b36b3ad 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -19,7 +19,7 @@ def SandyBridgeModel : SchedMachineModel {
   let IssueWidth = 4;
   let MinLatency = 0; // 0 = Out-of-order execution.
   let LoadLatency = 4;
-  let ILPWindow = 30;
+  let ILPWindow = 20;
   let MispredictPenalty = 16;
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 14619b6..74da2a9 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -170,6 +170,26 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
   return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
 }
 
+static bool OSHasAVXSupport() {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+    || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+#if defined(__GNUC__)
+  // Check xgetbv; this uses a .byte sequence instead of the instruction
+  // directly because older assemblers do not include support for xgetbv and
+  // there is no easy way to conditionally compile based on the assembler used.
+  int rEAX, rEDX;
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+  int rEAX = 0; // Ensures we return false
+#endif
+  return (rEAX & 6) == 6;
+#else
+  return false;
+#endif
+}
+
 void X86Subtarget::AutoDetectSubtargetFeatures() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   unsigned MaxLevel;
@@ -192,7 +212,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
   if ((ECX >> 9)  & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
   if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
   if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
-  if ((ECX >> 28) & 1) { X86SSELevel = AVX;   ToggleFeature(X86::FeatureAVX); }
+  if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) {
+    X86SSELevel = AVX;   ToggleFeature(X86::FeatureAVX);
+  }
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
@@ -467,6 +489,7 @@ void X86Subtarget::initializeEnvironment() {
   PostRAScheduler = false;
   PadShortFunctions = false;
   CallRegIndirect = false;
+  LEAUsesAG = false;
   stackAlignment = 4;
   // FIXME: this is a known good value for Yonah. How about others?
   MaxInlineSizeThreshold = 128;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 6fbdb1d..66832b9 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -165,6 +165,9 @@ protected:
   /// CallRegIndirect - True if the Calls with memory reference should be converted
   /// to a register-based indirect call.
   bool CallRegIndirect;
+  /// LEAUsesAG - True if the LEA instruction inputs have to be ready at
+  ///             address generation (AG) time.
+  bool LEAUsesAG;
 
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
@@ -278,6 +281,7 @@ public:
   bool hasSlowDivide() const { return HasSlowDivide; }
   bool padShortFunctions() const { return PadShortFunctions; }
   bool callRegIndirect() const { return CallRegIndirect; }
+  bool LEAusesAG() const { return LEAUsesAG; }
 
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
 
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 8aa58a2..00fa47f 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -215,6 +215,11 @@ bool X86PassConfig::addPreEmitPass() {
     addPass(createX86PadShortFunctions());
     ShouldPrint = true;
   }
+  if (getOptLevel() != CodeGenOpt::None &&
+      getX86Subtarget().LEAusesAG()){
+    addPass(createX86FixupLEAs());
+    ShouldPrint = true;
+  }
 
   return ShouldPrint;
 }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a98c699..eba9d78 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -334,9 +334,44 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+
+  static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = {
+    // These are somewhat magic numbers justified by looking at the output of
+    // Intel's IACA, running some kernels and making sure when we take
+    // legalization into account the throughput will be overestimated.
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    // There are faster sequences for float conversions.
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+  };
+
+  if (ST->hasSSE2() && !ST->hasAVX()) {
+    int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl,
+                                          array_lengthof(SSE2ConvTbl),
+                                          ISD, LTDest.second, LTSrc.second);
+    if (Idx != -1)
+      return LTSrc.first * SSE2ConvTbl[Idx].Cost;
+  }
+
   EVT SrcTy = TLI->getValueType(Src);
   EVT DstTy = TLI->getValueType(Dst);
 
+  // The function getSimpleVT only handles simple value types.
   if (!SrcTy.isSimple() || !DstTy.isSimple())
     return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
 
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 7b99967..a2ae40c 100644
--- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -97,8 +97,8 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
 static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
                                       uint64_t Address, const void *Decoder);
 
-static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
-                                       uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder);
 
 static DecodeStatus Decode2RInstruction(MCInst &Inst,
                                         unsigned Insn,
@@ -242,10 +242,9 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
-                                       uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(Val));
-  Inst.addOperand(MCOperand::CreateImm(0));
+static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(-(int64_t)Val));
   return MCDisassembler::Success;
 }
 
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 1592351..9ae8c0d 100644
--- a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -84,14 +84,3 @@ printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
   assert(Op.isExpr() && "unknown operand kind in printOperand");
   printExpr(Op.getExpr(), O);
 }
-
-void XCoreInstPrinter::
-printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
-  printOperand(MI, opNum, O);
-
-  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
-    return;
-
-  O << "+";
-  printOperand(MI, opNum+1, O);
-}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index b5b072d..c177365 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -66,6 +66,9 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
+  if (RM == Reloc::Default) {
+    RM = Reloc::Static;
+  }
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
index 08f091e..2f375fc 100644
--- a/contrib/llvm/lib/Target/XCore/XCore.h
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -20,12 +20,16 @@
 
 namespace llvm {
   class FunctionPass;
+  class ModulePass;
   class TargetMachine;
   class XCoreTargetMachine;
   class formatted_raw_ostream;
 
+  void initializeXCoreLowerThreadLocalPass(PassRegistry &p);
+
   FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
                                    CodeGenOpt::Level OptLevel);
+  ModulePass *createXCoreLowerThreadLocalPass();
 
 } // end namespace llvm;
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 0d146ba..e177ad3 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -36,7 +36,6 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
@@ -46,12 +45,6 @@
 #include <cctype>
 using namespace llvm;
 
-static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
-  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
-  cl::Hidden,
-  cl::value_desc("number"),
-  cl::init(8));
-
 namespace {
   class XCoreAsmPrinter : public AsmPrinter {
     const XCoreSubtarget &Subtarget;
@@ -152,10 +145,10 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   EmitAlignment(Align > 2 ? Align : 2, GV);
   
-  unsigned Size = TD->getTypeAllocSize(C->getType());
   if (GV->isThreadLocal()) {
-    Size *= MaxThreads;
+    report_fatal_error("TLS is not supported by this target!");
   }
+  unsigned Size = TD->getTypeAllocSize(C->getType());
   if (MAI->hasDotTypeDotSizeDirective()) {
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
     OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
@@ -164,10 +157,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   OutStreamer.EmitLabel(GVSym);
   
   EmitGlobalConstant(C);
-  if (GV->isThreadLocal()) {
-    for (unsigned i = 1; i < MaxThreads; ++i)
-      EmitGlobalConstant(C);
-  }
   // The ABI requires that unsigned scalar types smaller than 32 bits
   // are padded to 32 bits.
   if (Size < 4)
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index fbf86c5..eb29b50 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -68,8 +68,6 @@ namespace {
 
     // Complex Pattern Selectors.
     bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
-    bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
-    bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
     
     virtual const char *getPassName() const {
       return "XCore DAG->DAG Pattern Instruction Selection";
@@ -110,48 +108,6 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
-                                       SDValue &Offset) {
-  if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
-    Base = Addr.getOperand(0);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
-  }
-  if (Addr.getOpcode() == ISD::ADD) {
-    ConstantSDNode *CN = 0;
-    if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
-      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
-      // Constant word offset from a object in the data region
-      Base = Addr.getOperand(0).getOperand(0);
-      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
-      return true;
-    }
-  }
-  return false;
-}
-
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
-                                       SDValue &Offset) {
-  if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
-    Base = Addr.getOperand(0);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
-  }
-  if (Addr.getOpcode() == ISD::ADD) {
-    ConstantSDNode *CN = 0;
-    if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
-      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
-      // Constant word offset from a object in the data region
-      Base = Addr.getOperand(0).getOperand(0);
-      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
-      return true;
-    }
-  }
-  return false;
-}
-
 SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
@@ -185,36 +141,36 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case XCoreISD::LSUB: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case XCoreISD::MACCU: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::MACCS: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::LMUL: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::CRC8: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case ISD::BRIND:
     if (SDNode *ResNode = SelectBRIND(N))
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index a5d2be8..2d27f1a 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -36,6 +36,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
 using namespace llvm;
 
 const char *XCoreTargetLowering::
@@ -120,9 +122,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
 
-  // Thread Local Storage
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-
   // Conversion of i64 -> double produces constantpool nodes
   setOperationAction(ISD::ConstantPool, MVT::i32,   Custom);
 
@@ -172,7 +171,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode())
   {
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::BR_JT:              return LowerBR_JT(Op, DAG);
@@ -245,9 +243,20 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
 SDValue XCoreTargetLowering::
 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
 {
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
-  return getGlobalAddressWrapper(GA, GV, DAG);
+  DebugLoc DL = Op.getDebugLoc();
+  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GN->getGlobal();
+  int64_t Offset = GN->getOffset();
+  // We can only fold positive offsets that are a multiple of the word size.
+  int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0);
+  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset);
+  GA = getGlobalAddressWrapper(GA, GV, DAG);
+  // Handle the rest of the offset.
+  if (Offset != FoldedOffset) {
+    SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32);
+    GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining);
+  }
+  return GA;
 }
 
 static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
@@ -255,44 +264,6 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
                      DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
 }
 
-static inline bool isZeroLengthArray(Type *Ty) {
-  ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
-  return AT && (AT->getNumElements() == 0);
-}
-
-SDValue XCoreTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
-{
-  // FIXME there isn't really debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  // transform to label + getid() * size
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
-  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-  if (!GVar) {
-    // If GV is an alias then use the aliasee to determine size
-    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
-  }
-  if (!GVar) {
-    llvm_unreachable("Thread local object not a GlobalVariable?");
-  }
-  Type *Ty = cast<PointerType>(GV->getType())->getElementType();
-  if (!Ty->isSized() || isZeroLengthArray(Ty)) {
-#ifndef NDEBUG
-    errs() << "Size of thread local object " << GVar->getName()
-           << " is unknown\n";
-#endif
-    llvm_unreachable(0);
-  }
-  SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
-  const DataLayout *TD = TM.getDataLayout();
-  unsigned Size = TD->getTypeAllocSize(Ty);
-  SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
-                       DAG.getConstant(Size, MVT::i32));
-  return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
-}
-
 SDValue XCoreTargetLowering::
 LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
 {
@@ -350,55 +321,58 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
                      ScaledIndex);
 }
 
-static bool
-IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
-                                    int64_t &Offset)
+SDValue XCoreTargetLowering::
+lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain, SDValue Base,
+                                       int64_t Offset, SelectionDAG &DAG) const
 {
-  if (Addr.getOpcode() != ISD::ADD) {
-    return false;
+  if ((Offset & 0x3) == 0) {
+    return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(),
+                       false, false, false, 0);
   }
-  ConstantSDNode *CN = 0;
-  if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
-    return false;
-  }
-  int64_t off = CN->getSExtValue();
-  const SDValue &Base = Addr.getOperand(0);
-  const SDValue *Root = &Base;
-  if (Base.getOpcode() == ISD::ADD &&
-      Base.getOperand(1).getOpcode() == ISD::SHL) {
-    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
-                                                      .getOperand(1));
-    if (CN && (CN->getSExtValue() >= 2)) {
-      Root = &Base.getOperand(0);
-    }
-  }
-  if (isa<FrameIndexSDNode>(*Root)) {
-    // All frame indicies are word aligned
-    AlignedBase = Base;
-    Offset = off;
-    return true;
-  }
-  if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
-      Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
-    // All dp / cp relative addresses are word aligned
-    AlignedBase = Base;
-    Offset = off;
-    return true;
-  }
-  // Check for an aligned global variable.
-  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) {
-    const GlobalValue *GV = GA->getGlobal();
-    if (GA->getOffset() == 0 && GV->getAlignment() >= 4) {
-      AlignedBase = Base;
-      Offset = off;
-      return true;
-    }
+  // Lower to pair of consecutive word aligned loads plus some bit shifting.
+  int32_t HighOffset = RoundUpToAlignment(Offset, 4);
+  int32_t LowOffset = HighOffset - 4;
+  SDValue LowAddr, HighAddr;
+  if (GlobalAddressSDNode *GASD =
+        dyn_cast<GlobalAddressSDNode>(Base.getNode())) {
+    LowAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(),
+                                   LowOffset);
+    HighAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(),
+                                    HighOffset);
+  } else {
+    LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
+                          DAG.getConstant(LowOffset, MVT::i32));
+    HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
+                           DAG.getConstant(HighOffset, MVT::i32));
   }
-  return false;
+  SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, MVT::i32);
+  SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, MVT::i32);
+
+  SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+                            LowAddr, MachinePointerInfo(),
+                            false, false, false, 0);
+  SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+                             HighAddr, MachinePointerInfo(),
+                             false, false, false, 0);
+  SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+  SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+  SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+                      High.getValue(1));
+  SDValue Ops[] = { Result, Chain };
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
+{
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(Value, KnownZero, KnownOne);
+  return KnownZero.countTrailingOnes() >= 2;
 }
 
 SDValue XCoreTargetLowering::
 LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
          "Unexpected extension type");
@@ -416,45 +390,23 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SDValue BasePtr = LD->getBasePtr();
   DebugLoc DL = Op.getDebugLoc();
 
-  SDValue Base;
-  int64_t Offset;
-  if (!LD->isVolatile() &&
-      IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
-    if (Offset % 4 == 0) {
-      // We've managed to infer better alignment information than the load
-      // already has. Use an aligned load.
-      //
-      return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
-                         MachinePointerInfo(),
-                         false, false, false, 0);
+  if (!LD->isVolatile()) {
+    const GlobalValue *GV;
+    int64_t Offset = 0;
+    if (DAG.isBaseWithConstantOffset(BasePtr) &&
+        isWordAligned(BasePtr->getOperand(0), DAG)) {
+      SDValue NewBasePtr = BasePtr->getOperand(0);
+      Offset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
+      return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr,
+                                                    Offset, DAG);
+    }
+    if (TLI.isGAPlusOffset(BasePtr.getNode(), GV, Offset) &&
+        MinAlign(GV->getAlignment(), 4) == 4) {
+      SDValue NewBasePtr = DAG.getGlobalAddress(GV, DL,
+                                                BasePtr->getValueType(0));
+      return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr,
+                                                    Offset, DAG);
     }
-    // Lower to
-    // ldw low, base[offset >> 2]
-    // ldw high, base[(offset >> 2) + 1]
-    // shr low_shifted, low, (offset & 0x3) * 8
-    // shl high_shifted, high, 32 - (offset & 0x3) * 8
-    // or result, low_shifted, high_shifted
-    SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
-    SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
-    SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
-    SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
-
-    SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
-    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
-
-    SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
-                              LowAddr, MachinePointerInfo(),
-                              false, false, false, 0);
-    SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
-                               HighAddr, MachinePointerInfo(),
-                               false, false, false, 0);
-    SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
-    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
-    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
-                             High.getValue(1));
-    SDValue Ops[] = { Result, Chain };
-    return DAG.getMergeValues(Ops, 2, DL);
   }
 
   if (LD->getAlignment() == 2) {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index 8d258f5..c7dfa26 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -133,6 +133,9 @@ namespace llvm {
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
     SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
                                     SelectionDAG &DAG) const;
+    SDValue lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain,
+                                                   SDValue Base, int64_t Offset,
+                                                   SelectionDAG &DAG) const;
 
     // Lower Operand specifics
     SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
index 03653cb..587166c 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -168,21 +168,20 @@ def ldawb : PatFrag<(ops node:$addr, node:$offset),
                      (sub node:$addr, (shl node:$offset, 2))>;
 
 // Instruction operand types
-def calltarget  : Operand<i32>;
+def pcrel_imm  : Operand<i32>;
+def pcrel_imm_neg  : Operand<i32> {
+  let DecoderMethod = "DecodeNegImmOperand";
+}
 def brtarget : Operand<OtherVT>;
-def pclabel : Operand<i32>;
+def brtarget_neg : Operand<OtherVT> {
+  let DecoderMethod = "DecodeNegImmOperand";
+}
 
 // Addressing modes
 def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
-def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
-                 []>;
-def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
-                 []>;
 
 // Address operands
 def MEMii : Operand<i32> {
-  let PrintMethod = "printMemOperand";
-  let DecoderMethod = "DecodeMEMiiOperand";
   let MIOperandInfo = (ops i32imm, i32imm);
 }
 
@@ -274,10 +273,10 @@ multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> {
 }
 
 multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
-  def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
-                  !strconcat(OpcStr, " $a, -$b"), []>;
-  def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
-                    !strconcat(OpcStr, " $a, -$b"), []>;
+  def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b),
+                  !strconcat(OpcStr, " $a, $b"), []>;
+  def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b),
+                    !strconcat(OpcStr, " $a, $b"), []>;
 }
 
 multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
@@ -515,29 +514,29 @@ def LMUL_l6r : _FL6R<
 
 //let Uses = [DP] in ...
 let neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
                       "ldaw $a, dp[$b]", []>;
 
 let isReMaterializable = 1 in                    
-def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
                         "ldaw $a, dp[$b]",
-                        [(set RRegs:$a, ADDRdpii:$b)]>;
+                        [(set RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
 
 let mayLoad=1 in
-def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b),
                      "ldw $a, dp[$b]", []>;
 
-def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b),
                        "ldw $a, dp[$b]",
-                       [(set RRegs:$a, (load ADDRdpii:$b))]>;
+                       [(set RRegs:$a, (load (dprelwrapper tglobaladdr:$b)))]>;
 
 let mayStore=1 in
-def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b),
                       "stw $a, dp[$b]", []>;
 
-def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b),
                         "stw $a, dp[$b]",
-                        [(store RRegs:$a, ADDRdpii:$b)]>;
+                        [(store RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
 
 //let Uses = [CP] in ..
 let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
@@ -615,9 +614,9 @@ let Uses = [R11], isCall=1 in
 defm BLAT : FU6_LU6_np<0b0111001101, "blat">;
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>;
 
-def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>;
 
 def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
 
@@ -626,12 +625,12 @@ def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
 
 //let Uses = [CP] in ...
 let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]",
                     []>;
 
 let Defs = [R11], isReMaterializable = 1 in
-def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
-                      [(set R11, ADDRcpii:$a)]>;
+def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]",
+                      [(set R11, (cprelwrapper tglobaladdr:$a))]>;
 
 let Defs = [R11] in
 defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">;
@@ -658,16 +657,26 @@ defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
 
 // U10
 
-let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
-def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>;
+let Defs = [R11], isReMaterializable = 1 in {
+let neverHasSideEffects = 1 in
+def LDAPF_u10 : _FU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a", []>;
+
+def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a",
+                        [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
 
-let Defs = [R11], isReMaterializable = 1 in
-def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+let neverHasSideEffects = 1 in
+def LDAPB_u10 : _FU10<0b110111, (outs), (ins pcrel_imm_neg:$a), "ldap r11, $a",
+                      []>;
+
+let neverHasSideEffects = 1 in
+def LDAPB_lu10 : _FLU10<0b110111, (outs), (ins pcrel_imm_neg:$a),
+                        "ldap r11, $a",
                         [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
 
-let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in
-def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+let isCodeGenOnly = 1 in
+def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a",
                            [(set R11, (pcrelwrapper tblockaddress:$a))]>;
+}
 
 let isCall=1,
 // All calls clobber the link register and the non-callee-saved registers:
@@ -676,11 +685,15 @@ def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
 
 def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
 
-def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+def BLRF_u10 : _FU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a",
                      [(XCoreBranchLink immU10:$a)]>;
 
-def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+def BLRF_lu10 : _FLU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a",
                        [(XCoreBranchLink immU20:$a)]>;
+
+def BLRB_u10 : _FU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>;
+
+def BLRB_lu10 : _FLU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>;
 }
 
 let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
new file mode 100644
index 0000000..2e328b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -0,0 +1,145 @@
+//===-- XCoreLowerThreadLocal - Lower thread local variables --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains a pass that lowers thread local variables on the
+///        XCore.
+///
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DEBUG_TYPE "xcore-lower-thread-local"
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads(
+  "xcore-max-threads", cl::Optional,
+  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+  cl::Hidden, cl::value_desc("number"), cl::init(8));
+
+namespace {
+  /// Lowers thread local variables on the XCore. Each thread local variable is
+  /// expanded to an array of n elements indexed by the thread ID where n is the
+  /// fixed number hardware threads supported by the device.
+  struct XCoreLowerThreadLocal : public ModulePass {
+    static char ID;
+
+    XCoreLowerThreadLocal() : ModulePass(ID) {
+      initializeXCoreLowerThreadLocalPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool lowerGlobal(GlobalVariable *GV);
+
+    bool runOnModule(Module &M);
+  };
+}
+
+char XCoreLowerThreadLocal::ID = 0;
+
+INITIALIZE_PASS(XCoreLowerThreadLocal, "xcore-lower-thread-local",
+                "Lower thread local variables", false, false)
+
+ModulePass *llvm::createXCoreLowerThreadLocalPass() {
+  return new XCoreLowerThreadLocal();
+}
+
+static ArrayType *createLoweredType(Type *OriginalType) {
+  return ArrayType::get(OriginalType, MaxThreads);
+}
+
+static Constant *
+createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) {
+  SmallVector<Constant *, 8> Elements(MaxThreads);
+  for (unsigned i = 0; i != MaxThreads; ++i) {
+    Elements[i] = OriginalInitializer;
+  }
+  return ConstantArray::get(NewType, Elements);
+}
+
+static bool hasNonInstructionUse(GlobalVariable *GV) {
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
+       ++UI)
+    if (!isa<Instruction>(*UI))
+      return true;
+
+  return false;
+}
+
+static bool isZeroLengthArray(Type *Ty) {
+  ArrayType *AT = dyn_cast<ArrayType>(Ty);
+  return AT && (AT->getNumElements() == 0);
+}
+
+bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
+  Module *M = GV->getParent();
+  LLVMContext &Ctx = M->getContext();
+  if (!GV->isThreadLocal())
+    return false;
+
+  // Skip globals that we can't lower and leave it for the backend to error.
+  if (hasNonInstructionUse(GV) ||
+      !GV->getType()->isSized() || isZeroLengthArray(GV->getType()))
+    return false;
+
+  // Create replacement global.
+  ArrayType *NewType = createLoweredType(GV->getType()->getElementType());
+  Constant *NewInitializer = createLoweredInitializer(NewType,
+                                                      GV->getInitializer());
+  GlobalVariable *NewGV =
+    new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(),
+                       NewInitializer, "", 0, GlobalVariable::NotThreadLocal,
+                       GV->getType()->getAddressSpace(),
+                       GV->isExternallyInitialized());
+
+  // Update uses.
+  SmallVector<User *, 16> Users(GV->use_begin(), GV->use_end());
+  for (unsigned I = 0, E = Users.size(); I != E; ++I) {
+    User *U = Users[I];
+    Instruction *Inst = cast<Instruction>(U);
+    IRBuilder<> Builder(Inst);
+    Function *GetID = Intrinsic::getDeclaration(GV->getParent(),
+                                                Intrinsic::xcore_getid);
+    Value *ThreadID = Builder.CreateCall(GetID);
+    SmallVector<Value *, 2> Indices;
+    Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
+    Indices.push_back(ThreadID);
+    Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
+    U->replaceUsesOfWith(GV, Addr);
+  }
+
+  // Remove old global.
+  NewGV->takeName(GV);
+  GV->eraseFromParent();
+  return true;
+}
+
+bool XCoreLowerThreadLocal::runOnModule(Module &M) {
+  // Find thread local globals.
+  bool MadeChange = false;
+  SmallVector<GlobalVariable *, 16> ThreadLocalGlobals;
+  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+       GVI != E; ++GVI) {
+    GlobalVariable *GV = GVI;
+    if (GV->isThreadLocal())
+      ThreadLocalGlobals.push_back(GV);
+  }
+  for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) {
+    MadeChange |= lowerGlobal(ThreadLocalGlobals[I]);
+  }
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 28c3d12..07e5fff 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -46,6 +46,7 @@ public:
     return getTM<XCoreTargetMachine>();
   }
 
+  virtual bool addPreISel();
   virtual bool addInstSelector();
 };
 } // namespace
@@ -54,6 +55,11 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new XCorePassConfig(this, PM);
 }
 
+bool XCorePassConfig::addPreISel() {
+  addPass(createXCoreLowerThreadLocalPass());
+  return false;
+}
+
 bool XCorePassConfig::addInstSelector() {
   addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
   return false;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 8203899..88e3bfd 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -57,9 +57,4 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
                       ELF::SHF_ALLOC |
                       ELF::XCORE_SHF_CP_SECTION,
                       SectionKind::getReadOnlyWithRel());
-
-  // Dynamic linking is not supported. Data with relocations is placed in the
-  // same section as data without relocations.
-  DataRelSection = DataRelLocalSection = DataSection;
-  DataRelROSection = DataRelROLocalSection = ReadOnlySection;
 }
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8336d3a..a7bf188 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
@@ -66,13 +67,13 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
 static void FindUsedValues(GlobalVariable *LLVMUsed,
                            SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
   if (LLVMUsed == 0) return;
-  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
-  if (Inits == 0) return;
-  
-  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
-    if (GlobalValue *GV = 
-        dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
-      UsedValues.insert(GV);
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+    Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+    GlobalValue *GV = cast<GlobalValue>(Operand);
+    UsedValues.insert(GV);
+  }
 }
 
 // True if A is better than B.
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index dc99492..201f320 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -42,6 +42,7 @@ namespace {
 
   private:
     SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+    SmallPtrSet<Constant *, 8> SeenConstants;
 
     /// GlobalIsNeeded - mark the specific global value as needed, and
     /// recursively mark anything that it uses as also needed.
@@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
 
   // Make sure that all memory is released
   AliveGlobals.clear();
+  SeenConstants.clear();
 
   return Changed;
 }
@@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
 void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
   if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
     return GlobalIsNeeded(GV);
-  
+
   // Loop over all of the operands of the constant, adding any globals they
   // use to the list of needed globals.
-  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
-    if (Constant *OpC = dyn_cast<Constant>(*I))
-      MarkUsedGlobalsAsNeeded(OpC);
+  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+    // If we've already processed this constant there's no need to do it again.
+    Constant *Op = dyn_cast<Constant>(*I);
+    if (Op && SeenConstants.insert(Op))
+      MarkUsedGlobalsAsNeeded(Op);
+  }
 }
 
 // RemoveUnusedGlobalValue - Loop over all of the uses of the specified
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index b035a82..0ef900e 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3041,6 +3041,105 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   return true;
 }
 
+static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) {
+  for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+    Use *U = &I.getUse();
+    if (Tried.count(U))
+      continue;
+
+    User *Usr = *I;
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr);
+    if (!GV || !GV->hasName()) {
+      Tried.insert(U);
+      return I;
+    }
+
+    StringRef Name = GV->getName();
+    if (Name != "llvm.used" && Name != "llvm.compiler_used") {
+      Tried.insert(U);
+      return I;
+    }
+  }
+  return V->use_end();
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New);
+
+static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From,
+                                        Value *ToV, Use *U) {
+  Constant *To = cast<Constant>(ToV);
+
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+    Constant *Op = CA->getOperand(i);
+    NewOps.push_back(Op == From ? To : Op);
+  }
+
+  Constant *Replacement = ConstantArray::get(CA->getType(), NewOps);
+  assert(Replacement != CA && "CA didn't contain From!");
+
+  bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement);
+  if (Replacement->use_empty())
+    Replacement->destroyConstant();
+  if (CA->use_empty())
+    CA->destroyConstant();
+  return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From,
+                                        Value *ToV, Use *U) {
+  Constant *To = cast<Constant>(ToV);
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+    Constant *Op = CE->getOperand(i);
+    NewOps.push_back(Op == From ? To : Op);
+  }
+
+  Constant *Replacement = CE->getWithOperands(NewOps);
+  assert(Replacement != CE && "CE didn't contain From!");
+
+  bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement);
+  if (Replacement->use_empty())
+    Replacement->destroyConstant();
+  if (CE->use_empty())
+    CE->destroyConstant();
+  return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To,
+                                        Use *U) {
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+    return replaceUsesOfWithOnConstant(CA, From, To, U);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return replaceUsesOfWithOnConstant(CE, From, To, U);
+  C->replaceUsesOfWithOnConstant(From, To, U);
+  return true;
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) {
+  SmallPtrSet<Use*, 8> Tried;
+  bool Ret = false;
+  for (;;) {
+    Value::use_iterator I = getFirst(Old, Tried);
+    if (I == Old->use_end())
+      break;
+    Use &U = I.getUse();
+
+    // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+    // constant because they are uniqued.
+    if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+      if (!isa<GlobalValue>(C)) {
+        Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U);
+        continue;
+      }
+    }
+
+    U.set(New);
+    Ret = true;
+  }
+  return Ret;
+}
+
 bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
   bool Changed = false;
 
@@ -3060,11 +3159,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
     bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
 
     // Make all users of the alias use the aliasee instead.
-    if (!J->use_empty()) {
-      J->replaceAllUsesWith(Aliasee);
+    if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) {
       ++NumAliasesResolved;
       Changed = true;
     }
+    if (!J->use_empty())
+      continue;
 
     // If the alias is externally visible, we may still be able to simplify it.
     if (!J->hasLocalLinkage()) {
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 892100f..4ce749c 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
 STATISTIC(NumAliasesWritten, "Number of aliases generated");
 STATISTIC(NumDoubleWeak, "Number of new functions created");
 
+/// Returns the type id for a type to be hashed. We turn pointer types into
+/// integers here because the actual compare logic below considers pointers and
+/// integers of the same size as equal.
+static Type::TypeID getTypeIDForHash(Type *Ty) {
+  if (Ty->isPointerTy())
+    return Type::IntegerTyID;
+  return Ty->getTypeID();
+}
+
 /// Creates a hash-code for the function which is the same for any two
 /// functions that will compare equal, without looking at the instructions
 /// inside the function.
@@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) {
   ID.AddInteger(F->getCallingConv());
   ID.AddBoolean(F->hasGC());
   ID.AddBoolean(FTy->isVarArg());
-  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+    ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
   return ID.ComputeHash();
 }
 
@@ -200,8 +209,7 @@ private:
 
 // Any two pointers in the same address space are equivalent, intptr_t and
 // pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(Type *Ty1,
-                                          Type *Ty2) const {
+bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
   if (Ty1 == Ty2)
     return true;
   if (Ty1->getTypeID() != Ty2->getTypeID()) {
@@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   if (NewG->getReturnType()->isVoidTy()) {
     Builder.CreateRetVoid();
   } else {
-    Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+    Type *RetTy = NewG->getReturnType();
+    if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
+      Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
+    else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
+      Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
+    else
+      Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
   }
 
   NewG->copyAttributesFrom(G);
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 027a9f2..986c0b8 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops",
                      cl::desc("Run the Loop vectorization passes"));
 
 static cl::opt<bool>
-RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
+RunSLPVectorization("vectorize-slp",
+                    cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive",
+                    cl::desc("Run the BB vectorization passes"));
 
 static cl::opt<bool>
 UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() {
     DisableSimplifyLibCalls = false;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
-    Vectorize = RunBBVectorization;
+    BBVectorize = RunBBVectorization;
+    SLPVectorize = RunSLPVectorization;
     LoopVectorize = RunLoopVectorization;
 }
 
@@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
-  if (Vectorize) {
+  if (SLPVectorize)
+    MPM.add(createSLPVectorizerPass());     // Vectorize parallel scalar chains.
+
+  if (BBVectorize) {
     MPM.add(createBBVectorizePass());
     MPM.add(createInstructionCombiningPass());
     if (OptLevel > 1 && UseGVNAfterVectorization)
@@ -321,6 +330,14 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   PM.add(createGlobalDCEPass());
 }
 
+inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
+    return reinterpret_cast<PassManagerBuilder*>(P);
+}
+
+inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
+  return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
+}
+
 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
   PassManagerBuilder *PMB = new PassManagerBuilder();
   return wrap(PMB);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 5f8681f..3396f79 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -195,10 +195,9 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
                            SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
   if (LLVMUsed == 0) return;
   UsedValues.insert(LLVMUsed);
-  
-  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
-  if (Inits == 0) return;
-  
+
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
   for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
     if (GlobalValue *GV = 
           dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 1f6a3a5..2a36074 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -233,6 +233,7 @@ private:
   Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
   bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
   Value *EmitGEPOffset(User *GEP);
+  Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
 
 public:
   // InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7595da0..166f8df 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
   /// Class representing coefficient of floating-point addend.
   /// This class needs to be highly efficient, which is especially true for
   /// the constructor. As of I write this comment, the cost of the default
-  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 
+  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
   /// perform write-merging).
-  /// 
+  ///
   class FAddendCoef {
   public:
     // The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
     //
     FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
     ~FAddendCoef();
-  
+
     void set(short C) {
       assert(!insaneIntVal(C) && "Insane coefficient");
       IsFp = false; IntVal = C;
     }
-  
+
     void set(const APFloat& C);
 
     void negate();
-  
+
     bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
     Value *getValue(Type *) const;
-  
+
     // If possible, don't define operator+/operator- etc because these
     // operators inevitably call FAddendCoef's constructor which is not cheap.
     void operator=(const FAddendCoef &A);
     void operator+=(const FAddendCoef &A);
     void operator-=(const FAddendCoef &A);
     void operator*=(const FAddendCoef &S);
-  
+
     bool isOne() const { return isInt() && IntVal == 1; }
     bool isTwo() const { return isInt() && IntVal == 2; }
     bool isMinusOne() const { return isInt() && IntVal == -1; }
     bool isMinusTwo() const { return isInt() && IntVal == -2; }
-  
+
   private:
     bool insaneIntVal(int V) { return V > 4 || V < -4; }
     APFloat *getFpValPtr(void)
@@ -74,26 +74,28 @@ namespace {
       return *getFpValPtr();
     }
 
-    APFloat &getFpVal(void)
-      { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-  
+    APFloat &getFpVal(void) {
+      assert(IsFp && BufHasFpVal && "Incorret state");
+      return *getFpValPtr();
+    }
+
     bool isInt() const { return !IsFp; }
 
     // If the coefficient is represented by an integer, promote it to a
-    // floating point. 
+    // floating point.
     void convertToFpType(const fltSemantics &Sem);
 
     // Construct an APFloat from a signed integer.
     // TODO: We should get rid of this function when APFloat can be constructed
-    //       from an *SIGNED* integer. 
+    //       from an *SIGNED* integer.
     APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
   private:
 
     bool IsFp;
-  
+
     // True iff FpValBuf contains an instance of APFloat.
     bool BufHasFpVal;
-  
+
     // The integer coefficient of an individual addend is either 1 or -1,
     // and we try to simplify at most 4 addends from neighboring at most
     // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -102,7 +104,7 @@ namespace {
 
     AlignedCharArrayUnion<APFloat> FpValBuf;
   };
-  
+
   /// FAddend is used to represent floating-point addend. An addend is
   /// represented as <C, V>, where the V is a symbolic value, and C is a
   /// constant coefficient. A constant addend is represented as <C, 0>.
@@ -110,10 +112,10 @@ namespace {
   class FAddend {
   public:
     FAddend() { Val = 0; }
-  
+
     Value *getSymVal (void) const { return Val; }
     const FAddendCoef &getCoef(void) const { return Coeff; }
-  
+
     bool isConstant() const { return Val == 0; }
     bool isZero() const { return Coeff.isZero(); }
 
@@ -122,17 +124,17 @@ namespace {
       { Coeff.set(Coefficient); Val = V; }
     void set(const ConstantFP* Coefficient, Value *V)
       { Coeff.set(Coefficient->getValueAPF()); Val = V; }
-  
+
     void negate() { Coeff.negate(); }
-  
+
     /// Drill down the U-D chain one step to find the definition of V, and
     /// try to break the definition into one or two addends.
     static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-  
+
     /// Similar to FAddend::drillDownOneStep() except that the value being
     /// splitted is the addend itself.
     unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-  
+
     void operator+=(const FAddend &T) {
       assert((Val == T.Val) && "Symbolic-values disagree");
       Coeff += T.Coeff;
@@ -140,12 +142,12 @@ namespace {
 
   private:
     void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-  
+
     // This addend has the value of "Coeff * Val".
     Value *Val;
     FAddendCoef Coeff;
   };
-  
+
   /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
   /// with its neighboring at most two instructions.
   ///
@@ -153,17 +155,17 @@ namespace {
   public:
     FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
     Value *simplify(Instruction *FAdd);
-  
+
   private:
     typedef SmallVector<const FAddend*, 4> AddendVect;
-  
+
     Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
 
     Value *performFactorization(Instruction *I);
 
     /// Convert given addend to a Value
     Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-    
+
     /// Return the number of instructions needed to emit the N-ary addition.
     unsigned calcInstrNumber(const AddendVect& Vect);
     Value *createFSub(Value *Opnd0, Value *Opnd1);
@@ -173,10 +175,10 @@ namespace {
     Value *createFNeg(Value *V);
     Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
     void createInstPostProc(Instruction *NewInst);
-  
+
     InstCombiner::BuilderTy *Builder;
     Instruction *Instr;
-  
+
   private:
      // Debugging stuff are clustered here.
     #ifndef NDEBUG
@@ -188,7 +190,7 @@ namespace {
       void incCreateInstNum() {}
     #endif
   };
-} 
+}
 
 //===----------------------------------------------------------------------===//
 //
@@ -211,7 +213,7 @@ void FAddendCoef::set(const APFloat& C) {
   } else
     *P = C;
 
-  IsFp = BufHasFpVal = true; 
+  IsFp = BufHasFpVal = true;
 }
 
 void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
@@ -225,7 +227,7 @@ void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
     new(P) APFloat(Sem, 0 - IntVal);
     P->changeSign();
   }
-  IsFp = BufHasFpVal = true; 
+  IsFp = BufHasFpVal = true;
 }
 
 APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
@@ -254,14 +256,14 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
       getFpVal().add(That.getFpVal(), RndMode);
     return;
   }
-  
+
   if (isInt()) {
     const APFloat &T = That.getFpVal();
     convertToFpType(T.getSemantics());
     getFpVal().add(T, RndMode);
     return;
   }
-  
+
   APFloat &T = getFpVal();
   T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
 }
@@ -275,7 +277,7 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
       getFpVal().subtract(That.getFpVal(), RndMode);
     return;
   }
-  
+
   if (isInt()) {
     const APFloat &T = That.getFpVal();
     convertToFpType(T.getSemantics());
@@ -303,7 +305,7 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
     return;
   }
 
-  const fltSemantics &Semantic = 
+  const fltSemantics &Semantic =
     isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
 
   if (isInt())
@@ -338,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
 //  A - B                     <1, A>, <1,B>
 //  0 - B                     <-1, B>
 //  C * A,                    <C, A>
-//  A + C                     <1, A> <C, NULL> 
+//  A + C                     <1, A> <C, NULL>
 //  0 +/- 0                   <0, NULL> (corner case)
 //
 // Legend: A and B are not constant, C is constant
-// 
+//
 unsigned FAddend::drillValueDownOneStep
   (Value *Val, FAddend &Addend0, FAddend &Addend1) {
   Instruction *I = 0;
@@ -413,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
     return 0;
 
   unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
-  if (!BreakNum || Coeff.isOne()) 
+  if (!BreakNum || Coeff.isOne())
     return BreakNum;
 
   Addend0.Scale(Coeff);
@@ -435,10 +437,10 @@ unsigned FAddend::drillAddendDownOneStep
 Value *FAddCombine::performFactorization(Instruction *I) {
   assert((I->getOpcode() == Instruction::FAdd ||
           I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-  
+
   Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
   Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-  
+
   if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
     return 0;
 
@@ -453,14 +455,14 @@ Value *FAddCombine::performFactorization(Instruction *I) {
   Value *Opnd1_0 = I1->getOperand(0);
   Value *Opnd1_1 = I1->getOperand(1);
 
-  //  Input Instr I       Factor   AddSub0  AddSub1 
+  //  Input Instr I       Factor   AddSub0  AddSub1
   //  ----------------------------------------------
   // (x*y) +/- (x*z)        x        y         z
   // (y/x) +/- (z/x)        x        y         z
   //
   Value *Factor = 0;
   Value *AddSub0 = 0, *AddSub1 = 0;
-  
+
   if (isMpy) {
     if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
       Factor = Opnd0_0;
@@ -492,7 +494,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
 
   if (isMpy)
     return createFMul(Factor, NewAddSub);
- 
+
   return createFDiv(NewAddSub, Factor);
 }
 
@@ -506,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
   assert((I->getOpcode() == Instruction::FAdd ||
           I->getOpcode() == Instruction::FSub) && "Expect add/sub");
 
-  // Save the instruction before calling other member-functions. 
+  // Save the instruction before calling other member-functions.
   Instr = I;
 
   FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
   unsigned Opnd0_ExpNum = 0;
   unsigned Opnd1_ExpNum = 0;
 
-  if (!Opnd0.isConstant()) 
+  if (!Opnd0.isConstant())
     Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
 
   // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -539,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
 
     Value *V0 = I->getOperand(0);
     Value *V1 = I->getOperand(1);
-    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&  
+    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
                  (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
 
     if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -579,7 +581,7 @@ Value *FAddCombine::simplify(Instruction *I) {
       return R;
   }
 
-  // step 6: Try factorization as the last resort, 
+  // step 6: Try factorization as the last resort,
   return performFactorization(I);
 }
 
@@ -588,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
   unsigned AddendNum = Addends.size();
   assert(AddendNum <= 4 && "Too many addends");
 
-  // For saving intermediate results; 
+  // For saving intermediate results;
   unsigned NextTmpIdx = 0;
   FAddend TmpResult[3];
 
@@ -604,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
   AddendVect SimpVect;
 
   // The outer loop works on one symbolic-value at a time. Suppose the input
-  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 
+  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
   // The symbolic-values will be processed in this order: x, y, z.
   //
   for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -631,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
       if (T && T->getSymVal() == Val) {
         // Set null such that next iteration of the outer loop will not process
         // this addend again.
-        Addends[SameSymIdx] = 0; 
+        Addends[SameSymIdx] = 0;
         SimpVect.push_back(T);
       }
     }
@@ -644,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
         R += *SimpVect[Idx];
 
       // Pop all addends being folded and push the resulting folded addend.
-      SimpVect.resize(StartIdx); 
+      SimpVect.resize(StartIdx);
       if (Val != 0) {
         if (!R.isZero()) {
           SimpVect.push_back(&R);
@@ -657,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
     }
   }
 
-  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 
+  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
          "out-of-bound access");
 
   if (ConstAdd)
@@ -679,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
   assert(!Opnds.empty() && "Expect at least one addend");
 
   // Step 1: Check if the # of instructions needed exceeds the quota.
-  // 
+  //
   unsigned InstrNeeded = calcInstrNumber(Opnds);
   if (InstrNeeded > InstrQuota)
     return 0;
@@ -700,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
   // Iterate the addends, creating fadd/fsub using adjacent two addends.
   for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
        I != E; I++) {
-    bool NeedNeg; 
+    bool NeedNeg;
     Value *V = createAddendVal(**I, NeedNeg);
     if (!LastVal) {
       LastVal = V;
@@ -726,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
   }
 
   #ifndef NDEBUG
-    assert(CreateInstrNum == InstrNeeded && 
+    assert(CreateInstrNum == InstrNeeded &&
            "Inconsistent in instruction numbers");
   #endif
 
@@ -784,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
   unsigned OpndNum = Opnds.size();
   unsigned InstrNeeded = OpndNum - 1;
 
-  // The number of addends in the form of "(-1)*x". 
-  unsigned NegOpndNum = 0; 
+  // The number of addends in the form of "(-1)*x".
+  unsigned NegOpndNum = 0;
 
   // Adjust the number of instructions needed to emit the N-ary add.
   for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
@@ -972,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
                                            XorLHS);
       }
+      // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+      // transform them into (X + (signbit ^ C))
+      if (XorRHS->getValue().isSignBit())
+          return BinaryOperator::CreateAdd(XorLHS,
+                                           ConstantExpr::getXor(XorRHS, CI));
     }
   }
 
@@ -1230,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     }
   }
 
+  // select C, 0, B + select C, A, 0 -> select C, A, B
+  {
+    Value *A1, *B1, *C1, *A2, *B2, *C2;
+    if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+        match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+      if (C1 == C2) {
+        Constant *Z1=0, *Z2=0;
+        Value *A, *B, *C=C1;
+        if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+            Z1 = dyn_cast<Constant>(A1); A = A2;
+            Z2 = dyn_cast<Constant>(B2); B = B1;
+        } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+            Z1 = dyn_cast<Constant>(B1); B = B2;
+            Z2 = dyn_cast<Constant>(A2); A = A1; 
+        }
+        
+        if (Z1 && Z2 && 
+            (I.hasNoSignedZeros() || 
+             (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+          return SelectInst::Create(C, A, B);
+        }
+      }
+    }
+  }
+
   if (I.hasUnsafeAlgebra()) {
     if (Value *V = FAddCombine(Builder).simplify(&I))
       return ReplaceInstUsesWith(I, V);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 990cbc3..ec75dd2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   return 0;
 }
 
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi).  In practice, we emit the more efficient
 /// (V-Lo) \<u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
@@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
       RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+      return 0;
+
     // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
     if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
       if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:
-      if (LHSCst == SubOne(RHSCst)) {
-        // (X == 13 | X == 14) -> X-13 <u 2
-        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
-        return Builder->CreateICmpULT(Add, AddCST);
-      }
-
       if (LHS->getOperand(0) == RHS->getOperand(0)) {
         // if LHSCst and RHSCst differ only by one bit:
         // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
@@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
         }
       }
 
+      if (LHSCst == SubOne(RHSCst)) {
+        // (X == 13 | X == 14) -> X-13 <u 2
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+        return Builder->CreateICmpULT(Add, AddCST);
+      }
+
       break;                         // (X == 13 | X == 15) -> no change
     case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
     case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cd1bd..78b4a2c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
         NestF->getType() == PointerType::getUnqual(NewFTy) ?
         NestF : ConstantExpr::getBitCast(NestF,
                                          PointerType::getUnqual(NewFTy));
-      const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+      const AttributeSet &NewPAL =
+          AttributeSet::get(FTy->getContext(), NewAttrs);
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index a96e754..4c252c0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -232,7 +232,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   Constant *Init = GV->getInitializer();
   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
     return 0;
-  
+
   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
   if (ArrayElementCount > 1024) return 0;  // Don't blow up on huge arrays.
 
@@ -2487,6 +2487,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       return new ICmpInst(Pred, Y, Z);
     }
 
+    // icmp slt (X + -1), Y -> icmp sle X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+        match(B, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+    // icmp sge (X + -1), Y -> icmp sgt X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+        match(B, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+    // icmp sle (X + 1), Y -> icmp slt X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+        match(B, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+    // icmp sgt (X + 1), Y -> icmp sge X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+        match(B, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+    // if C1 has greater magnitude than C2:
+    //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+    //  s.t. C3 = C1 - C2
+    //
+    // if C2 has greater magnitude than C1:
+    //  icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+    //  s.t. C3 = C2 - C1
+    if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+        (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+      if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+        if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+          const APInt &AP1 = C1->getValue();
+          const APInt &AP2 = C2->getValue();
+          if (AP1.isNegative() == AP2.isNegative()) {
+            APInt AP1Abs = C1->getValue().abs();
+            APInt AP2Abs = C2->getValue().abs();
+            if (AP1Abs.uge(AP2Abs)) {
+              ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+              Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+              return new ICmpInst(Pred, NewAdd, C);
+            } else {
+              ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+              Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+              return new ICmpInst(Pred, A, NewAdd);
+            }
+          }
+        }
+
+
     // Analyze the case when either Op0 or Op1 is a sub instruction.
     // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
     A = 0; B = 0; C = 0; D = 0;
@@ -2620,6 +2669,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   }
 
   { Value *A, *B;
+    // Transform (A & ~B) == 0 --> (A & B) != 0
+    // and       (A & ~B) != 0 --> (A & B) == 0
+    // if A is a power of 2.
+    if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+        match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+      return new ICmpInst(I.getInversePredicate(),
+                          Builder->CreateAnd(A, B),
+                          Op1);
+
     // ~x < ~y --> y < x
     // ~x < cst --> ~cst < x
     if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 337cfe3..e2d7966 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
       // doesn't, it does.
-      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
-                                          IsOffset || !GEP->hasAllZeroIndices()))
+      if (!isOnlyCopiedFromConstantGlobal(
+              GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
         return false;
       continue;
     }
@@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
   if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
-      Type *NewTy = 
+      Type *NewTy =
         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
       AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
       New->setAlignment(AI.getAlignment());
@@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
 
     Type *SrcPTy = SrcTy->getElementType();
 
-    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 
+    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
          DestPTy->isVectorTy()) {
       // If the source is an array, the code below will not succeed.  Check to
       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
           }
 
       if (IC.getDataLayout() &&
-          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 
+          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
             SrcPTy->isVectorTy()) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
@@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         // Okay, we are casting from one integer or pointer type to another of
         // the same size.  Instead of casting the pointer before the load, cast
         // the result of the loaded value.
-        LoadInst *NewLoad = 
+        LoadInst *NewLoad =
           IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
         NewLoad->setAlignment(LI.getAlignment());
         NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // None of the following transforms are legal for volatile/atomic loads.
   // FIXME: Some of it is okay for atomic loads; needs refactoring.
   if (!LI.isSimple()) return 0;
-  
+
   // Do really simple store-to-load forwarding and load CSE, to catch cases
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
@@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
                     Constant::getNullValue(Op->getType()), &LI);
       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
-  } 
+  }
 
   // load null/undef -> unreachable
   // TODO: Consider a target hook for valid address spaces for this xform.
@@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     if (CE->isCast())
       if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
         return Res;
-  
+
   if (Op->hasOneUse()) {
     // Change select and PHI nodes to select values instead of addresses: this
     // helps alias analysis out a lot, allows many others simplifications, and
@@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
   PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
   if (SrcTy == 0) return 0;
-  
+
   Type *SrcPTy = SrcTy->getElementType();
 
   if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
     return 0;
-  
+
   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
   /// to its first element.  This allows us to handle things like:
   ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
   /// on 32-bit hosts.
   SmallVector<Value*, 4> NewGEPIndices;
-  
+
   // If the source is an array, the code below will not succeed.  Check to
   // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
   // constants.
@@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     // Index through pointer.
     Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
     NewGEPIndices.push_back(Zero);
-    
+
     while (1) {
       if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
         if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
         break;
       }
     }
-    
+
     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
   }
 
   if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
     return 0;
-  
+
   // If the pointers point into different address spaces or if they point to
   // values with different sizes, we can't do the transformation.
   if (!IC.getDataLayout() ||
-      SrcTy->getAddressSpace() != 
+      SrcTy->getAddressSpace() !=
         cast<PointerType>(CI->getType())->getAddressSpace() ||
       IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
       IC.getDataLayout()->getTypeSizeInBits(DestPTy))
     return 0;
 
   // Okay, we are casting from one integer or pointer type to another of
-  // the same size.  Instead of casting the pointer before 
+  // the same size.  Instead of casting the pointer before
   // the store, cast the value to be stored.
   Value *NewCast;
   Value *SIOp0 = SI.getOperand(0);
@@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     if (SIOp0->getType()->isPointerTy())
       opcode = Instruction::PtrToInt;
   }
-  
+
   // SIOp0 is a pointer to aggregate and this is a store to the first field,
   // emit a GEP to index into its first field.
   if (!NewGEPIndices.empty())
     CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-  
+
   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
                                    SIOp0->getName()+".c");
   SI.setOperand(0, NewCast);
@@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
 static bool equivalentAddressValues(Value *A, Value *B) {
   // Test if the values are trivially equivalent.
   if (A == B) return true;
-  
+
   // Test if the values come form identical arithmetic instructions.
   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
   // its only used to compare two uses within the same basic block, which
@@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
     if (Instruction *BI = dyn_cast<Instruction>(B))
       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
-  
+
   // Otherwise they may not be equivalent.
   return false;
 }
@@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   // If the RHS is an alloca with a single use, zapify the store, making the
   // alloca dead.
   if (Ptr->hasOneUse()) {
-    if (isa<AllocaInst>(Ptr)) 
+    if (isa<AllocaInst>(Ptr))
       return EraseInstFromFunction(SI);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
       if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
       ScanInsts++;
       continue;
-    }    
-    
+    }
+
     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
       // Prev store isn't volatile, and stores to the same location?
       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       }
       break;
     }
-    
+
     // If this is a load, we have to stop.  However, if the loaded value is from
     // the pointer we're loading and is producing the pointer we're storing,
     // then *this* store is dead (X = load P; store X -> P).
@@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
           LI->isSimple())
         return EraseInstFromFunction(SI);
-      
+
       // Otherwise, this is a load from some other location.  Stores before it
       // may not be dead.
       break;
     }
-    
+
     // Don't skip over loads or things that can modify memory.
     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
       break;
@@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       if (Instruction *Res = InstCombineStoreToCast(*this, SI))
         return Res;
 
-  
+
   // If this store is the last instruction in the basic block (possibly
   // excepting debug info instructions), and if the block ends with an
   // unconditional branch, try to move it to the successor block.
-  BBI = &SI; 
+  BBI = &SI;
   do {
     ++BBI;
   } while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     if (BI->isUnconditional())
       if (SimplifyStoreAtEndOfBlock(SI))
         return 0;  // xform done!
-  
+
   return 0;
 }
 
@@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
 ///
 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   BasicBlock *StoreBB = SI.getParent();
-  
+
   // Check to see if the successor block has exactly two incoming edges.  If
   // so, see if the other predecessor contains a store to the same location.
   // if so, insert a PHI node (if needed) and move the stores down.
   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-  
+
   // Determine whether Dest has exactly two predecessors and, if so, compute
   // the other predecessor.
   pred_iterator PI = pred_begin(DestBB);
@@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
 
   if (++PI == pred_end(DestBB))
     return false;
-  
+
   P = *PI;
   if (P != StoreBB) {
     if (OtherBB)
@@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
   if (!OtherBr || BBI == OtherBB->begin())
     return false;
-  
+
   // If the other block ends in an unconditional branch, check for the 'if then
   // else' case.  there is an instruction before the branch.
   StoreInst *OtherStore = 0;
@@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   } else {
     // Otherwise, the other block ended with a conditional branch. If one of the
     // destinations is StoreBB, then we have the if/then case.
-    if (OtherBr->getSuccessor(0) != StoreBB && 
+    if (OtherBr->getSuccessor(0) != StoreBB &&
         OtherBr->getSuccessor(1) != StoreBB)
       return false;
-    
+
     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
     // if/then triangle.  See if there is a store to the same ptr as SI that
     // lives in OtherBB.
@@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
           BBI == OtherBB->begin())
         return false;
     }
-    
+
     // In order to eliminate the store in OtherBr, we have to
     // make sure nothing reads or overwrites the stored value in
     // StoreBB.
@@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
         return false;
     }
   }
-  
+
   // Insert a PHI node now if we need it.
   Value *MergedVal = OtherStore->getOperand(0);
   if (MergedVal != SI.getOperand(0)) {
@@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
     MergedVal = InsertNewInstBefore(PN, DestBB->front());
   }
-  
+
   // Advance to a place where it is safe to insert the new store and
   // insert it.
   BBI = DestBB->getFirstInsertionPt();
@@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
                                    SI.getOrdering(),
                                    SI.getSynchScope());
   InsertNewInstBefore(NewSI, *BBI);
-  NewSI->setDebugLoc(OtherStore->getDebugLoc()); 
+  NewSI->setDebugLoc(OtherStore->getDebugLoc());
 
   // If the two stores had the same TBAA tag, preserve it.
   if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
@@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
                                OtherStore->getMetadata(LLVMContext::MD_tbaa))))
       NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
 
-  
+
   // Nuke the old stores.
   EraseInstFromFunction(SI);
   EraseInstFromFunction(*OtherStore);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 173f2bf..ecc9fc3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // if this is safe.  For example, the use could be in dynamically unreached
   // code.
   if (!V->hasOneUse()) return 0;
-  
+
   bool MadeChange = false;
 
   // ((1 << A) >>u B) --> (1 << (A-B))
@@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
     A = IC.Builder->CreateSub(A, B);
     return IC.Builder->CreateShl(PowerOf2, A);
   }
-  
+
   // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
   // inexact.  Similarly for <<.
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
@@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
         I->setOperand(0, V2);
         MadeChange = true;
       }
-      
+
       if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
         I->setIsExact();
         MadeChange = true;
       }
-      
+
       if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
         I->setHasNoUnsignedWrap();
         MadeChange = true;
@@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // TODO: Lots more we could do here:
   //    If V is a phi node, we can call this on each of its operands.
   //    "select cond, X, 0" can simplify to "X".
-  
+
   return MadeChange ? V : 0;
 }
 
@@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
     LHSExt = LHSExt.zext(W * 2);
     RHSExt = RHSExt.zext(W * 2);
   }
-  
+
   APInt MulExt = LHSExt * RHSExt;
-  
+
   if (!sign)
     return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-  
+
   APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
   APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
   return MulExt.slt(Min) || MulExt.sgt(Max);
@@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
   if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
     return BinaryOperator::CreateNeg(Op0, I.getName());
-  
+
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-    
+
     // ((X << C1)*C2) == (X * (C2 << C1))
     if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
       if (SI->getOpcode() == Instruction::Shl)
         if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
           return BinaryOperator::CreateMul(SI->getOperand(0),
                                            ConstantExpr::getShl(CI, ShOp));
-    
+
     const APInt &Val = CI->getValue();
     if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
       Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
@@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
       return Shl;
     }
-    
+
     // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
     { Value *X; ConstantInt *C1;
       if (Op0->hasOneUse() &&
@@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       }
     }
   }
-  
+
   // Simplify mul instructions with a constant RHS.
-  if (isa<Constant>(Op1)) {    
+  if (isa<Constant>(Op1)) {
     // Try to fold constant mul into select arguments.
     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     Value *Op1C = Op1;
     BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
     if (!BO ||
-        (BO->getOpcode() != Instruction::UDiv && 
+        (BO->getOpcode() != Instruction::UDiv &&
          BO->getOpcode() != Instruction::SDiv)) {
       Op1C = Op0;
       BO = dyn_cast<BinaryOperator>(Op1);
@@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     if (match(Op1, m_Shl(m_One(), m_Value(Y))))
       return BinaryOperator::CreateShl(Op0, Y);
   }
-  
+
   // If one of the operands of the multiply is a cast from a boolean value, then
   // we know the bool is either zero or one, so this is a 'masking' multiply.
   //   X * Y (where Y is 0 or 1) -> X & (0-Y)
   if (!I.getType()->isVectorTy()) {
     // -2 is "-1 << 1" so it is all bits set except the low one.
     APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-    
+
     Value *BoolCast = 0, *OtherOp = 0;
     if (MaskedValueIsZero(Op0, Negative2))
       BoolCast = Op0, OtherOp = Op1;
@@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
      return;
    if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
      return;
-              
+
    ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
    if (CFP && CFP->isExactlyValue(0.5)) {
      Y = I->getOperand(1);
@@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
    CFP = dyn_cast<ConstantFP>(I->getOperand(1));
    if (CFP && CFP->isExactlyValue(0.5))
      Y = I->getOperand(0);
-} 
+}
 
 /// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
 /// true iff the given value is FMul or FDiv with one and only one operand
 /// being a normal constant (i.e. not Zero/NaN/Infinity).
 static bool isFMulOrFDivWithConstant(Value *V) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I || (I->getOpcode() != Instruction::FMul && 
+  if (!I || (I->getOpcode() != Instruction::FMul &&
              I->getOpcode() != Instruction::FDiv))
     return false;
 
@@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) {
 /// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
 /// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
 /// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
-/// This function is to simplify "FMulOrDiv * C" and returns the 
+/// This function is to simplify "FMulOrDiv * C" and returns the
 /// resulting expression. Note that this function could return NULL in
 /// case the constants cannot be folded into a normal floating-point.
-/// 
+///
 Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
                                    Instruction *InsertBefore) {
   assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
@@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
       if (isNormalFp(F)) {
         R = BinaryOperator::CreateFMul(Opnd0, F);
       } else {
-        // (X / C1) * C => X / (C1/C) 
+        // (X / C1) * C => X / (C1/C)
         Constant *F = ConstantExpr::getFDiv(C1, C);
         if (isNormalFp(cast<ConstantFP>(F)))
           R = BinaryOperator::CreateFDiv(Opnd0, F);
@@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         if (C0) {
           std::swap(C0, C1);
           std::swap(Opnd0, Opnd1);
-          Swap = true; 
+          Swap = true;
         }
 
         if (C1 && C1->getValueAPF().isNormal() &&
             isFMulOrFDivWithConstant(Opnd0)) {
           Value *M1 = ConstantExpr::getFMul(C1, C);
-          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? 
+          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
                       foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
                       0;
           if (M0 && M1) {
@@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     }
 
     // (X*Y) * X => (X*X) * Y where Y != X
-    //  The purpose is two-fold: 
+    //  The purpose is two-fold:
     //   1) to form a power expression (of X).
     //   2) potentially shorten the critical path: After transformation, the
     //  latency of the instruction Y is amortized by the expression of X*X,
@@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
       }
     }
 
+    // B * (uitofp i1 C) -> select C, B, 0
+    if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+      Value *LHS = Op0, *RHS = Op1;
+      Value *B, *C;
+      if (!match(RHS, m_UIToFp(m_Value(C))))
+        std::swap(LHS, RHS);
+
+      if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+        B = LHS;
+        Value *Zero = ConstantFP::getNegativeZero(B->getType());
+        return SelectInst::Create(C, B, Zero);
+      }
+    }
+
+    // A * (1 - uitofp i1 C) -> select C, 0, A
+    if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+      Value *LHS = Op0, *RHS = Op1;
+      Value *A, *C;
+      if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))))
+        std::swap(LHS, RHS);
+
+      if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) &&
+          C->getType()->isIntegerTy(1)) {
+        A = LHS;
+        Value *Zero = ConstantFP::getNegativeZero(A->getType());
+        return SelectInst::Create(C, Zero, A);
+      }
+    }
+
     if (!isa<Constant>(Op1))
       std::swap(Opnd0, Opnd1);
     else
@@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
 /// instruction.
 bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
   SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-  
+
   // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
   int NonNullOperand = -1;
   if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
@@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
   if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
     if (ST->isNullValue())
       NonNullOperand = 1;
-  
+
   if (NonNullOperand == -1)
     return false;
-  
+
   Value *SelectCond = SI->getOperand(0);
-  
+
   // Change the div/rem to use 'Y' instead of the select.
   I.setOperand(1, SI->getOperand(NonNullOperand));
-  
+
   // Okay, we know we replace the operand of the div/rem with 'Y' with no
   // problem.  However, the select, or the condition of the select may have
   // multiple uses.  Based on our knowledge that the operand must be non-zero,
   // propagate the known value for the select into other uses of it, and
   // propagate a known value of the condition into its other users.
-  
+
   // If the select and condition only have a single use, don't bother with this,
   // early exit.
   if (SI->use_empty() && SelectCond->hasOneUse())
     return true;
-  
+
   // Scan the current block backward, looking for other uses of SI.
   BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-  
+
   while (BBI != BBFront) {
     --BBI;
     // If we found a call to a function, we can't assume it will return, so
     // information from below it cannot be propagated above it.
     if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
       break;
-    
+
     // Replace uses of the select or its condition with the known values.
     for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
          I != E; ++I) {
@@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
         Worklist.Add(BBI);
       }
     }
-    
+
     // If we past the instruction, quit looking for it.
     if (&*BBI == SI)
       SI = 0;
     if (&*BBI == SelectCond)
       SelectCond = 0;
-    
+
     // If we ran out of things to eliminate, break out of the loop.
     if (SelectCond == 0 && SI == 0)
       break;
-    
+
   }
   return true;
 }
@@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     I.setOperand(1, V);
     return &I;
   }
-  
+
   // Handle cases involving: [su]div X, (select Cond, Y, Z)
   // This does not apply for fdiv.
   if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
@@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   // Handle the integer div common cases
   if (Instruction *Common = commonIDivTransforms(I))
     return Common;
-  
-  { 
+
+  {
     // X udiv 2^C -> X >> C
     // Check to see if this is an unsigned division with an exact power of 2,
     // if so, convert to a right shift.
     const APInt *C;
     if (match(Op1, m_Power2(C))) {
       BinaryOperator *LShr =
-      BinaryOperator::CreateLShr(Op0, 
-                                 ConstantInt::get(Op0->getType(), 
+      BinaryOperator::CreateLShr(Op0,
+                                 ConstantInt::get(Op0->getType(),
                                                   C->logBase2()));
       if (I.isExact()) LShr->setIsExact();
       return LShr;
@@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       return BinaryOperator::CreateLShr(Op0, N);
     }
   }
-  
+
   // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
   // where C1&C2 are powers of two.
   { Value *Cond; const APInt *C1, *C2;
@@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       // Construct the "on true" case of the select
       Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
                                        I.isExact());
-  
+
       // Construct the "on false" case of the select
       Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
                                        I.isExact());
-      
+
       // construct the select instruction and return it.
       return SelectInst::Create(Cond, TSI, FSI);
     }
@@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
         // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
         return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
       }
-      
+
       if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
         // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
         // Safe because the only negative value (1 << Y) can take on is
@@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
       }
     }
   }
-  
+
   return 0;
 }
 
 /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
 /// FP value and:
-///    1) 1/C is exact, or 
+///    1) 1/C is exact, or
 ///    2) reciprocal is allowed.
 /// If the convertion was successful, the simplified expression "X * 1/C" is
 /// returned; otherwise, NULL is returned.
@@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
   const APFloat &FpVal = Divisor->getValueAPF();
   APFloat Reciprocal(FpVal.getSemantics());
   bool Cvt = FpVal.getExactInverse(&Reciprocal);
-    
+
   if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
     Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
     (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
@@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
         Constant *C = ConstantExpr::getFMul(C1, C2);
         const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
         if (F.isNormal() && !F.isDenormal()) {
-          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), 
+          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
                                          AllowReciprocal);
           if (!Res)
-            Res = BinaryOperator::CreateFDiv(X, C); 
+            Res = BinaryOperator::CreateFDiv(X, C);
         }
       }
 
@@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
     if (Fold) {
       const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
       if (FoldC.isNormal() && !FoldC.isDenormal()) {
-        Instruction *R = CreateDiv ? 
+        Instruction *R = CreateDiv ?
                          BinaryOperator::CreateFDiv(Fold, X) :
                          BinaryOperator::CreateFMul(X, Fold);
         R->setFastMathFlags(I.getFastMathFlags());
@@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
 
   if (Instruction *common = commonIRemTransforms(I))
     return common;
-  
+
   // X urem C^2 -> X and C-1
   { const APInt *C;
     if (match(Op1, m_Power2(C)))
@@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
                                        ConstantInt::get(I.getType(), *C-1));
   }
 
-  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
+  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
   if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
     Constant *N1 = Constant::getAllOnesValue(I.getType());
     Value *Add = Builder->CreateAdd(Op1, N1);
@@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   // Handle the integer rem common cases
   if (Instruction *Common = commonIRemTransforms(I))
     return Common;
-  
+
   if (Value *RHSNeg = dyn_castNegVal(Op1))
     if (!isa<Constant>(RHSNeg) ||
         (isa<ConstantInt>(RHSNeg) &&
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index b0a998c..bd14e81 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   unsigned Opc = FirstInst->getOpcode();
   Value *LHSVal = FirstInst->getOperand(0);
   Value *RHSVal = FirstInst->getOperand(1);
-    
+
   Type *LHSType = LHSVal->getType();
   Type *RHSType = RHSVal->getType();
-  
+
   bool isNUW = false, isNSW = false, isExact = false;
   if (OverflowingBinaryOperator *BO =
         dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
@@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   } else if (PossiblyExactOperator *PEO =
                dyn_cast<PossiblyExactOperator>(FirstInst))
     isExact = PEO->isExact();
-  
+
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
@@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     if (CmpInst *CI = dyn_cast<CmpInst>(I))
       if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
         return 0;
-    
+
     if (isNUW)
       isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
     if (isNSW)
       isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     if (isExact)
       isExact = cast<PossiblyExactOperator>(I)->isExact();
-    
+
     // Keep track of which operand needs a phi node.
     if (I->getOperand(0) != LHSVal) LHSVal = 0;
     if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   // bad when the PHIs are in the header of a loop.
   if (!LHSVal && !RHSVal)
     return 0;
-  
+
   // Otherwise, this is safe to transform!
-  
+
   Value *InLHS = FirstInst->getOperand(0);
   Value *InRHS = FirstInst->getOperand(1);
   PHINode *NewLHS = 0, *NewRHS = 0;
@@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewLHS, PN);
     LHSVal = NewLHS;
   }
-  
+
   if (RHSVal == 0) {
     NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
                              FirstInst->getOperand(1)->getName() + ".pn");
@@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewRHS, PN);
     RHSVal = NewRHS;
   }
-  
+
   // Add all operands to the new PHIs.
   if (NewLHS || NewRHS) {
     for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
@@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
       }
     }
   }
-    
+
   if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
     CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                      LHSVal, RHSVal);
@@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
 
 Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-  
-  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 
+
+  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
                                         FirstInst->op_end());
   // This is true if all GEP bases are allocas and if all indices into them are
   // constants.
@@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // more than one phi, which leads to higher register pressure. This is
   // especially bad when the PHIs are in the header of a loop.
   bool NeededPhi = false;
-  
+
   bool AllInBounds = true;
-  
+
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       return 0;
 
     AllInBounds &= GEP->isInBounds();
-    
+
     // Keep track of whether or not all GEPs are of alloca pointers.
     if (AllBasePointersAreAllocas &&
         (!isa<AllocaInst>(GEP->getOperand(0)) ||
          !GEP->hasAllConstantIndices()))
       AllBasePointersAreAllocas = false;
-    
+
     // Compare the operand lists.
     for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
       if (FirstInst->getOperand(op) == GEP->getOperand(op))
         continue;
-      
+
       // Don't merge two GEPs when two operands differ (introducing phi nodes)
       // if one of the PHIs has a constant for the index.  The index may be
       // substantially cheaper to compute for the constants, so making it a
@@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
           isa<ConstantInt>(GEP->getOperand(op)))
         return 0;
-      
+
       if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
         return 0;
 
@@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       NeededPhi = true;
     }
   }
-  
+
   // If all of the base pointers of the PHI'd GEPs are from allocas, don't
   // bother doing this transformation.  At best, this will just save a bit of
   // offset calculation, but all the predecessors will have to materialize the
@@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // which can usually all be folded into the load.
   if (AllBasePointersAreAllocas)
     return 0;
-  
+
   // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
   // that is variable.
   SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-  
+
   bool HasAnyPHIs = false;
   for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
     if (FixedOperands[i]) continue;  // operand doesn't need a phi.
@@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
     PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
                                      FirstOp->getName()+".pn");
     InsertNewInstBefore(NewPN, PN);
-    
+
     NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
     OperandPhis[i] = NewPN;
     FixedOperands[i] = NewPN;
     HasAnyPHIs = true;
   }
 
-  
+
   // Add all operands to the new PHIs.
   if (HasAnyPHIs) {
     for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
       GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
       BasicBlock *InBB = PN.getIncomingBlock(i);
-      
+
       for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
         if (PHINode *OpPhi = OperandPhis[op])
           OpPhi->addIncoming(InGEP->getOperand(op), InBB);
     }
   }
-  
+
   Value *Base = FixedOperands[0];
-  GetElementPtrInst *NewGEP = 
+  GetElementPtrInst *NewGEP =
     GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
   if (AllInBounds) NewGEP->setIsInBounds();
   NewGEP->setDebugLoc(FirstInst->getDebugLoc());
@@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
 /// to a register.
 static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
   BasicBlock::iterator BBI = L, E = L->getParent()->end();
-  
+
   for (++BBI; BBI != E; ++BBI)
     if (BBI->mayWriteToMemory())
       return false;
-  
+
   // Check for non-address taken alloca.  If not address-taken already, it isn't
   // profitable to do this xform.
   if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
@@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
       isAddressTaken = true;
       break;
     }
-    
+
     if (!isAddressTaken && AI->isStaticAlloca())
       return false;
   }
-  
+
   // If this load is a load from a GEP with a constant offset from an alloca,
   // then we don't want to sink it.  In its present form, it will be
   // load [constant stack offset].  Sinking it will cause us to have to
@@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
       if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
         return false;
-  
+
   return true;
 }
 
@@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   bool isVolatile = FirstLI->isVolatile();
   unsigned LoadAlignment = FirstLI->getAlignment();
   unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
-  
+
   // We can't sink the load if the loaded value could be modified between the
   // load and the PHI.
   if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
       !isSafeAndProfitableToSinkLoad(FirstLI))
     return 0;
-  
+
   // If the PHI is of volatile loads and the load block has multiple
   // successors, sinking it would remove a load of the volatile value from
   // the path through the other successor.
-  if (isVolatile && 
+  if (isVolatile &&
       FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
     return 0;
-  
+
   // Check to see if all arguments are the same operation.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
     if (!LI || !LI->hasOneUse())
       return 0;
-    
-    // We can't sink the load if the loaded value could be modified between 
+
+    // We can't sink the load if the loaded value could be modified between
     // the load and the PHI.
     if (LI->isVolatile() != isVolatile ||
         LI->getParent() != PN.getIncomingBlock(i) ||
         LI->getPointerAddressSpace() != LoadAddrSpace ||
         !isSafeAndProfitableToSinkLoad(LI))
       return 0;
-      
+
     // If some of the loads have an alignment specified but not all of them,
     // we can't do the transformation.
     if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
       return 0;
-    
+
     LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-    
+
     // If the PHI is of volatile loads and the load block has multiple
     // successors, sinking it would remove a load of the volatile value from
     // the path through the other successor.
@@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
         LI->getParent()->getTerminator()->getNumSuccessors() != 1)
       return 0;
   }
-  
+
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
   PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
                                    PN.getNumIncomingValues(),
                                    PN.getName()+".in");
-  
+
   Value *InVal = FirstLI->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-  
+
   // Add all operands to the new PHI.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
@@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
       InVal = 0;
     NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
   }
-  
+
   Value *PhiVal;
   if (InVal) {
     // The new PHI unions all of the same values together.  This is really
@@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewPN, PN);
     PhiVal = NewPN;
   }
-  
+
   // If this was a volatile load that we are merging, make sure to loop through
   // and mark all the input loads as non-volatile.  If we don't do this, we will
   // insert a new volatile load and the old ones will not be deletable.
   if (isVolatile)
     for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
       cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-  
+
   LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
   NewLI->setDebugLoc(FirstLI->getDebugLoc());
   return NewLI;
@@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     return FoldPHIArgGEPIntoPHI(PN);
   if (isa<LoadInst>(FirstInst))
     return FoldPHIArgLoadIntoPHI(PN);
-  
+
   // Scan the instruction, looking for input operations that can be folded away.
   // If all input operands to the phi are the same instruction (e.g. a cast from
   // the same type or "+42") we can pull the operation through the PHI, reducing
@@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   Constant *ConstantOp = 0;
   Type *CastSrcTy = 0;
   bool isNUW = false, isNSW = false, isExact = false;
-  
+
   if (isa<CastInst>(FirstInst)) {
     CastSrcTy = FirstInst->getOperand(0)->getType();
 
@@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
         return 0;
     }
   } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
-    // Can fold binop, compare or shift here if the RHS is a constant, 
+    // Can fold binop, compare or shift here if the RHS is a constant,
     // otherwise call FoldPHIArgBinOpIntoPHI.
     ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
     if (ConstantOp == 0)
       return FoldPHIArgBinOpIntoPHI(PN);
-    
+
     if (OverflowingBinaryOperator *BO =
         dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
       isNUW = BO->hasNoUnsignedWrap();
@@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     } else if (I->getOperand(1) != ConstantOp) {
       return 0;
     }
-    
+
     if (isNUW)
       isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
     if (isNSW)
@@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     NewCI->setDebugLoc(FirstInst->getDebugLoc());
     return NewCI;
   }
-  
+
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
     BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
     if (isNUW) BinOp->setHasNoUnsignedWrap();
@@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     BinOp->setDebugLoc(FirstInst->getDebugLoc());
     return BinOp;
   }
-  
+
   CmpInst *CIOp = cast<CmpInst>(FirstInst);
   CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                    PhiVal, ConstantOp);
@@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN,
   // Remember this node, and if we find the cycle, return.
   if (!PotentiallyDeadPHIs.insert(PN))
     return true;
-  
+
   // Don't scan crazily complex things.
   if (PotentiallyDeadPHIs.size() == 16)
     return false;
@@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN,
 /// PHIsEqualValue - Return true if this phi node is always equal to
 /// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
 ///   z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
                            SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
   // See if we already saw this PHI node.
   if (!ValueEqualPHIs.insert(PN))
     return true;
-  
+
   // Don't scan crazily complex things.
   if (ValueEqualPHIs.size() == 16)
     return false;
- 
+
   // Scan the operands to see if they are either phi nodes or are equal to
   // the value.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
     } else if (Op != NonPhiInVal)
       return false;
   }
-  
+
   return true;
 }
 
@@ -557,10 +557,10 @@ struct PHIUsageRecord {
   unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
   unsigned Shift;     // The amount shifted.
   Instruction *Inst;  // The trunc instruction.
-  
+
   PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
     : PHIId(pn), Shift(Sh), Inst(User) {}
-  
+
   bool operator<(const PHIUsageRecord &RHS) const {
     if (PHIId < RHS.PHIId) return true;
     if (PHIId > RHS.PHIId) return false;
@@ -570,15 +570,15 @@ struct PHIUsageRecord {
            RHS.Inst->getType()->getPrimitiveSizeInBits();
   }
 };
-  
+
 struct LoweredPHIRecord {
   PHINode *PN;        // The PHI that was lowered.
   unsigned Shift;     // The amount shifted.
   unsigned Width;     // The width extracted.
-  
+
   LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
     : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-  
+
   // Ctor form used by DenseMap.
   LoweredPHIRecord(PHINode *pn, unsigned Sh)
     : PN(pn), Shift(Sh), Width(0) {}
@@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   // PHIUsers - Keep track of all of the truncated values extracted from a set
   // of PHIs, along with their offset.  These are the things we want to rewrite.
   SmallVector<PHIUsageRecord, 16> PHIUsers;
-  
+
   // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
   // nodes which are extracted from. PHIsToSlice is a set we use to avoid
   // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
   // check the uses of (to ensure they are all extracts).
   SmallVector<PHINode*, 8> PHIsToSlice;
   SmallPtrSet<PHINode*, 8> PHIsInspected;
-  
+
   PHIsToSlice.push_back(&FirstPhi);
   PHIsInspected.insert(&FirstPhi);
-  
+
   for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
     PHINode *PN = PHIsToSlice[PHIId];
-    
+
     // Scan the input list of the PHI.  If any input is an invoke, and if the
     // input is defined in the predecessor, then we won't be split the critical
     // edge which is required to insert a truncate.  Because of this, we have to
@@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
       if (II == 0) continue;
       if (II->getParent() != PN->getIncomingBlock(i))
         continue;
-     
+
       // If we have a phi, and if it's directly in the predecessor, then we have
       // a critical edge where we need to put the truncate.  Since we can't
       // split the edge in instcombine, we have to bail out.
       return 0;
     }
-      
-    
+
+
     for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
          UI != E; ++UI) {
       Instruction *User = cast<Instruction>(*UI);
-      
+
       // If the user is a PHI, inspect its uses recursively.
       if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
         if (PHIsInspected.insert(UserPN))
           PHIsToSlice.push_back(UserPN);
         continue;
       }
-      
+
       // Truncates are always ok.
       if (isa<TruncInst>(User)) {
         PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
         continue;
       }
-      
+
       // Otherwise it must be a lshr which can only be used by one trunc.
       if (User->getOpcode() != Instruction::LShr ||
           !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
           !isa<ConstantInt>(User->getOperand(1)))
         return 0;
-      
+
       unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
       PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
     }
   }
-  
+
   // If we have no users, they must be all self uses, just nuke the PHI.
   if (PHIUsers.empty())
     return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-  
+
   // If this phi node is transformable, create new PHIs for all the pieces
   // extracted out of it.  First, sort the users by their offset and size.
   array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-  
+
   DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
             for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
               errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
         );
-  
+
   // PredValues - This is a temporary used when rewriting PHI nodes.  It is
   // hoisted out here to avoid construction/destruction thrashing.
   DenseMap<BasicBlock*, Value*> PredValues;
-  
+
   // ExtractedVals - Each new PHI we introduce is saved here so we don't
   // introduce redundant PHIs.
   DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-  
+
   for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
     unsigned PHIId = PHIUsers[UserI].PHIId;
     PHINode *PN = PHIsToSlice[PHIId];
     unsigned Offset = PHIUsers[UserI].Shift;
     Type *Ty = PHIUsers[UserI].Inst->getType();
-    
+
     PHINode *EltPHI;
-    
+
     // If we've already lowered a user like this, reuse the previously lowered
     // value.
     if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-      
+
       // Otherwise, Create the new PHI node for this user.
       EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
                                PN->getName()+".off"+Twine(Offset), PN);
       assert(EltPHI->getType() != PN->getType() &&
              "Truncate didn't shrink phi?");
-    
+
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         BasicBlock *Pred = PN->getIncomingBlock(i);
         Value *&PredVal = PredValues[Pred];
-        
+
         // If we already have a value for this predecessor, reuse it.
         if (PredVal) {
           EltPHI->addIncoming(PredVal, Pred);
@@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
           EltPHI->addIncoming(PredVal, Pred);
           continue;
         }
-        
+
         if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
           // If the incoming value was a PHI, and if it was one of the PHIs we
           // already rewrote it, just use the lowered value.
@@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
             continue;
           }
         }
-        
+
         // Otherwise, do an extract in the predecessor.
         Builder->SetInsertPoint(Pred, Pred->getTerminator());
         Value *Res = InVal;
@@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         Res = Builder->CreateTrunc(Res, Ty, "extract.t");
         PredVal = Res;
         EltPHI->addIncoming(Res, Pred);
-        
+
         // If the incoming value was a PHI, and if it was one of the PHIs we are
         // rewriting, we will ultimately delete the code we inserted.  This
         // means we need to revisit that PHI to make sure we extract out the
@@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
           if (PHIsInspected.count(OldInVal)) {
             unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
                                           OldInVal)-PHIsToSlice.begin();
-            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
+            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
                                               cast<Instruction>(Res)));
             ++UserE;
           }
       }
       PredValues.clear();
-      
+
       DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
                    << *EltPHI << '\n');
       ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
     }
-    
+
     // Replace the use of this piece with the PHI node.
     ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
   }
-  
+
   // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
   // with undefs.
   Value *Undef = UndefValue::get(FirstPhi.getType());
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       if (DeadPHICycle(PU, PotentiallyDeadPHIs))
         return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
     }
-   
+
     // If this phi has a single use, and if that use just computes a value for
     // the next iteration of a loop, delete the phi.  This occurs with unused
     // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
@@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
 
     if (InValNo != NumIncomingVals) {
       Value *NonPhiInVal = PN.getIncomingValue(InValNo);
-      
+
       // Scan the rest of the operands to see if there are any conflicts, if so
       // there is no need to recursively scan other phis.
       for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
@@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
         if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
           break;
       }
-      
+
       // If we scanned over all operands, then we have one unique value plus
       // phi values.  Scan PHI nodes to see if they all merge in each other or
       // the value.
@@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
-  
+
   return 0;
 }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 121aa1f..59502fb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -350,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   return 0;
 }
 
+/// foldSelectICmpAndOr - We want to turn:
+///   (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// into:
+///   (or (shl (and X, C1), C3), y)
+/// iff:
+///   C1 and C2 are both powers of 2
+/// where:
+///   C3 = Log(C2) - Log(C1)
+///
+/// This transform handles cases where:
+/// 1. The icmp predicate is inverted
+/// 2. The select operands are reversed
+/// 3. The magnitude of C2 and C1 are flipped
+static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+                                  Value *FalseVal,
+                                  InstCombiner::BuilderTy *Builder) {
+  const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+  if (!IC || !IC->isEquality())
+    return 0;
+
+  Value *CmpLHS = IC->getOperand(0);
+  Value *CmpRHS = IC->getOperand(1);
+
+  if (!match(CmpRHS, m_Zero()))
+    return 0;
+
+  Value *X;
+  const APInt *C1;
+  if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+    return 0;
+
+  const APInt *C2;
+  bool OrOnTrueVal = false;
+  bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
+  if (!OrOnFalseVal)
+    OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
+
+  if (!OrOnFalseVal && !OrOnTrueVal)
+    return 0;
+
+  Value *V = CmpLHS;
+  Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+
+  unsigned C1Log = C1->logBase2();
+  unsigned C2Log = C2->logBase2();
+  if (C2Log > C1Log) {
+    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+    V = Builder->CreateShl(V, C2Log - C1Log);
+  } else if (C1Log > C2Log) {
+    V = Builder->CreateLShr(V, C1Log - C2Log);
+    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+  } else
+    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+
+  ICmpInst::Predicate Pred = IC->getPredicate();
+  if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+      (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+    V = Builder->CreateXor(V, *C2);
+
+  return Builder->CreateOr(V, Y);
+}
+
 /// visitSelectInstWithICmp - Visit a SelectInst that has an
 /// ICmpInst as its first operand.
 ///
@@ -521,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
     }
   }
 
+  if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
+    return ReplaceInstUsesWith(SI, V);
+
   return Changed ? &SI : 0;
 }
 
@@ -676,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // Change: A = select B, false, C --> A = and !B, C
       Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
       return BinaryOperator::CreateAnd(NotCond, FalseVal);
-    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+    }
+    if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
       if (C->getZExtValue() == false) {
         // Change: A = select B, C, false --> A = and B, C
         return BinaryOperator::CreateAnd(CondVal, TrueVal);
@@ -690,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     // select a, a, b  -> a|b
     if (CondVal == TrueVal)
       return BinaryOperator::CreateOr(CondVal, FalseVal);
-    else if (CondVal == FalseVal)
+    if (CondVal == FalseVal)
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
 
     // select a, ~a, b -> (~a)&b
     // select a, b, ~a -> (~a)|b
     if (match(TrueVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateAnd(TrueVal, FalseVal);
-    else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+    if (match(FalseVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateOr(TrueVal, FalseVal);
   }
 
@@ -838,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             Value *NewFalseOp = NegVal;
             if (AddOp != TI)
               std::swap(NewTrueOp, NewFalseOp);
-            Value *NewSel = 
+            Value *NewSel =
               Builder->CreateSelect(CondVal, NewTrueOp,
                                     NewFalseOp, SI.getName() + ".p");
 
@@ -862,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     Value *LHS, *RHS, *LHS2, *RHS2;
     if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
       if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
-        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                           SI, SPF, RHS))
           return R;
       if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
@@ -908,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     return &SI;
   }
 
-  if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+  if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
     unsigned VWidth = VecTy->getNumElements();
     APInt UndefElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -918,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return &SI;
     }
 
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
-      // Form a shufflevector instruction.
-      SmallVector<Constant *, 8> Mask(VWidth);
-      Type *Int32Ty = Type::getInt32Ty(CV->getContext());
-      for (unsigned i = 0; i != VWidth; ++i) {
-        Constant *Elem = cast<Constant>(CV->getOperand(i));
-        if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
-          Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
-        else if (isa<UndefValue>(Elem))
-          Mask[i] = UndefValue::get(Int32Ty);
-        else
-          return 0;
-      }
-      Constant *MaskVal = ConstantVector::get(Mask);
-      Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
-      return ReplaceInstUsesWith(SI, V);
-    }
-
     if (isa<ConstantAggregateZero>(CondVal)) {
       return ReplaceInstUsesWith(SI, FalseVal);
     }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1..4301ddb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
   return 0;
 }
 
+// If we have a PHI node with a vector type that has only 2 uses: feed
+// itself and be an operand of extractelemnt at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+  // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
+  if (!PN->hasNUses(2))
+    return NULL;
+
+  // If so, it's known at this point that one operand is PHI and the other is
+  // an extractelement node. Find the PHI user that is not the extractelement
+  // node.
+  Value::use_iterator iu = PN->use_begin();
+  Instruction *PHIUser = dyn_cast<Instruction>(*iu);
+  if (PHIUser == cast<Instruction>(&EI))
+    PHIUser = cast<Instruction>(*(++iu));
+
+  // Verify that this PHI user has one use, which is the PHI itself,
+  // and that it is a binary operation which is cheap to scalarize.
+  // otherwise return NULL.
+  if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+    !(isa<BinaryOperator>(PHIUser)) ||
+    !CheapToScalarize(PHIUser, true))
+    return NULL;
+
+  // Create a scalar PHI node that will replace the vector PHI node
+  // just before the current PHI node.
+  PHINode * scalarPHI = cast<PHINode>(
+    InsertNewInstWith(PHINode::Create(EI.getType(),
+    PN->getNumIncomingValues(), ""), *PN));
+  // Scalarize each PHI operand.
+  for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+    Value *PHIInVal = PN->getIncomingValue(i);
+    BasicBlock *inBB = PN->getIncomingBlock(i);
+    Value *Elt = EI.getIndexOperand();
+    // If the operand is the PHI induction variable:
+    if (PHIInVal == PHIUser) {
+      // Scalarize the binary operation. Its first operand is the
+      // scalar PHI and the second operand is extracted from the other
+      // vector operand.
+      BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+      unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
+      Value *Op = Builder->CreateExtractElement(
+        B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+      Value *newPHIUser = InsertNewInstWith(
+        BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
+        *B0);
+      scalarPHI->addIncoming(newPHIUser, inBB);
+    } else {
+      // Scalarize PHI input:
+      Instruction *newEI =
+        ExtractElementInst::Create(PHIInVal, Elt, "");
+      // Insert the new instruction into the predecessor basic block.
+      Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+      BasicBlock::iterator InsertPos;
+      if (pos && !isa<PHINode>(pos)) {
+        InsertPos = pos;
+        ++InsertPos;
+      } else {
+        InsertPos = inBB->getFirstInsertionPt();
+      }
+
+      InsertNewInstWith(newEI, *InsertPos);
+
+      scalarPHI->addIncoming(newEI, inBB);
+    }
+  }
+  return ReplaceInstUsesWith(EI, scalarPHI);
+}
+
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is constant with all elements the same, replace EI with
   // that element.  We handle a known element # below.
@@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
             return new BitCastInst(Elt, EI.getType());
     }
+
+    // If there's a vector PHI feeding a scalar use through this extractelement
+    // instruction, try to scalarize the PHI.
+    if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+      Instruction *scalarPHI = scalarizePHI(EI, PN);
+      if (scalarPHI)
+        return (scalarPHI);
+    }
   }
 
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
       // Canonicalize extractelement(cast) -> cast(extractelement)
       // bitcasts can change the number of vector elements and they cost nothing
-      if (CI->hasOneUse() && EI.hasOneUse() &&
-          (CI->getOpcode() != Instruction::BitCast)) {
+      if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
         Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
                                                   EI.getIndexOperand());
+        Worklist.AddValue(EE);
         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
       }
     }
@@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
 
         if (VecOp == RHS) {
           Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          // Update Mask to reflect that `ScalarOp' has been inserted at
+          // position `InsertedIdx' within the vector returned by IEI.
+          Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
           // Everything but the extracted element is replaced with the RHS.
           for (unsigned i = 0; i != NumElts; ++i) {
             if (i != InsertedIdx)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c6115e3..ec10751 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
       Module *M = II->getParent()->getParent()->getParent();
       Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
       InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
-                         ArrayRef<Value *>(), "", II->getParent());
+                         None, "", II->getParent());
     }
     return EraseInstFromFunction(MI);
   }
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
index 927982d..39de4b0 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
@@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
 bool BlackList::isInInit(const GlobalVariable &G) const {
   return (isIn(*G.getParent()) ||
           inSection("global-init", G.getName()) ||
-          inSection("global-init-type", GetGVTypeString(G)));
+          inSection("global-init-type", GetGVTypeString(G)) ||
+          inSection("global-init-src", G.getParent()->getModuleIdentifier()));
 }
 
 bool BlackList::inSection(const StringRef Section,
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 8ba1025..9f35396 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 53a31b0..373168e 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -30,6 +30,7 @@ using namespace llvm::objcarc;
 bool llvm::objcarc::EnableARCOpts;
 static cl::opt<bool, true>
 EnableARCOptimizations("enable-objc-arc-opts",
+                       cl::desc("enable/disable all ARC Optimizations"),
                        cl::location(EnableARCOpts),
                        cl::init(true));
 
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index b96c64f..c43f4f4 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -66,6 +66,8 @@ namespace {
     Constant *RetainAutoreleaseCallee;
     /// Declaration for objc_retainAutoreleaseReturnValue().
     Constant *RetainAutoreleaseRVCallee;
+    /// Declaration for objc_retainAutoreleasedReturnValue().
+    Constant *RetainRVCallee;
 
     /// The inline asm string to insert between calls and RetainRV calls to make
     /// the optimization work on targets which need it.
@@ -77,9 +79,12 @@ namespace {
     SmallPtrSet<CallInst *, 8> StoreStrongCalls;
 
     Constant *getStoreStrongCallee(Module *M);
+    Constant *getRetainRVCallee(Module *M);
     Constant *getRetainAutoreleaseCallee(Module *M);
     Constant *getRetainAutoreleaseRVCallee(Module *M);
 
+    bool OptimizeRetainCall(Function &F, Instruction *Retain);
+
     bool ContractAutorelease(Function &F, Instruction *Autorelease,
                              InstructionClass Class,
                              SmallPtrSet<Instruction *, 4>
@@ -172,6 +177,57 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
   return RetainAutoreleaseRVCallee;
 }
 
+Constant *ObjCARCContract::getRetainRVCallee(Module *M) {
+  if (!RetainRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    RetainRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+                             Attribute);
+  }
+  return RetainRVCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value. We do this late so we do not disrupt the dataflow analysis in
+/// ObjCARCOpt.
+bool
+ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
+  ImmutableCallSite CS(GetObjCArg(Retain));
+  const Instruction *Call = CS.getInstruction();
+  if (!Call)
+    return false;
+  if (Call->getParent() != Retain->getParent())
+    return false;
+
+  // Check that the call is next to the retain.
+  BasicBlock::const_iterator I = Call;
+  ++I;
+  while (IsNoopInstruction(I)) ++I;
+  if (&*I != Retain)
+    return false;
+
+  // Turn it to an objc_retainAutoreleasedReturnValue.
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "Transforming objc_retain => "
+                  "objc_retainAutoreleasedReturnValue since the operand is a "
+                  "return value.\nOld: "<< *Retain << "\n");
+
+  // We do not have to worry about tail calls/does not throw since
+  // retain/retainRV have the same properties.
+  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+  DEBUG(dbgs() << "New: " << *Retain << "\n");
+  return true;
+}
+
 /// Merge an autorelease with a retain into a fused call.
 bool
 ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
@@ -329,6 +385,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
   StoreStrongCallee = 0;
   RetainAutoreleaseCallee = 0;
   RetainAutoreleaseRVCallee = 0;
+  RetainRVCallee = 0;
 
   // Initialize RetainRVMarker.
   RetainRVMarker = 0;
@@ -380,7 +437,6 @@ bool ObjCARCContract::runOnFunction(Function &F) {
     // objc_retainBlock does not necessarily return its argument.
     InstructionClass Class = GetBasicInstructionClass(Inst);
     switch (Class) {
-    case IC_Retain:
     case IC_FusedRetainAutorelease:
     case IC_FusedRetainAutoreleaseRV:
       break;
@@ -389,6 +445,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
       if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
         continue;
       break;
+    case IC_Retain:
+      // Attempt to convert retains to retainrvs if they are next to function
+      // calls.
+      if (!OptimizeRetainCall(F, Inst))
+        break;
+      // If we succeed in our optimization, fall through.
+      // FALLTHROUGH
     case IC_RetainRV: {
       // If we're compiling for a target which needs a special inline-asm
       // marker to do the retainAutoreleasedReturnValue optimization,
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 92d6fc4..43e2e20 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -191,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
   do {
     const Value *V = Worklist.pop_back_val();
 
-    DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+    DEBUG(dbgs() << "Visiting: " << *V << "\n");
 
     for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
          UI != UE; ++UI) {
       const User *UUser = *UI;
 
-      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+      DEBUG(dbgs() << "User: " << *UUser << "\n");
 
       // Special - Use by a call (callee or argument) is not considered
       // to be an escape.
@@ -207,8 +207,7 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
       case IC_StoreStrong:
       case IC_Autorelease:
       case IC_AutoreleaseRV: {
-        DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
-              "arguments. Pointer Escapes!\n");
+        DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
         // These special functions make copies of their pointer arguments.
         return true;
       }
@@ -223,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
             isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
 
           if (VisitedSet.insert(UUser)) {
-            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
-                  "Ptr escapes if result escapes. Adding to list.\n");
+            DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
+                  " Adding to list.\n");
             Worklist.push_back(UUser);
           } else {
-            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
-                  "\n");
+            DEBUG(dbgs() << "Already visited node.\n");
           }
           continue;
         }
@@ -245,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
         continue;
       }
       // Otherwise, conservatively assume an escape.
-      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+      DEBUG(dbgs() << "Assuming ptr escapes.\n");
       return true;
     }
   } while (!Worklist.empty());
 
   // No escapes found.
-  DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+  DEBUG(dbgs() << "Ptr does not escape.\n");
   return false;
 }
 
@@ -305,6 +303,16 @@ STATISTIC(NumRets,        "Number of return value forwarding "
                           "retain+autoreleaes eliminated");
 STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+STATISTIC(NumRetainsBeforeOpt,
+          "Number of retains before optimization.");
+STATISTIC(NumReleasesBeforeOpt,
+          "Number of releases before optimization.");
+#ifndef NDEBUG
+STATISTIC(NumRetainsAfterOpt,
+          "Number of retains after optimization.");
+STATISTIC(NumReleasesAfterOpt,
+          "Number of releases after optimization.");
+#endif
 
 namespace {
   /// \enum Sequence
@@ -375,7 +383,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
 namespace {
   /// \brief Unidirectional information about either a
   /// retain-decrement-use-release sequence or release-use-decrement-retain
-  /// reverese sequence.
+  /// reverse sequence.
   struct RRInfo {
     /// After an objc_retain, the reference count of the referenced
     /// object is known to be positive. Similarly, before an objc_release, the
@@ -410,6 +418,10 @@ namespace {
       KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
 
     void clear();
+
+    bool IsTrackingImpreciseReleases() {
+      return ReleaseMetadata != 0;
+    }
   };
 }
 
@@ -428,7 +440,7 @@ namespace {
     /// True if the reference count is known to be incremented.
     bool KnownPositiveRefCount;
 
-    /// True of we've seen an opportunity for partial RR elimination, such as
+    /// True if we've seen an opportunity for partial RR elimination, such as
     /// pushing calls into a CFG triangle or into one side of a CFG diamond.
     bool Partial;
 
@@ -457,6 +469,7 @@ namespace {
     }
 
     void SetSeq(Sequence NewSeq) {
+      DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
       Seq = NewSeq;
     }
 
@@ -469,7 +482,8 @@ namespace {
     }
 
     void ResetSequenceProgress(Sequence NewSeq) {
-      Seq = NewSeq;
+      DEBUG(dbgs() << "Resetting sequence progress.\n");
+      SetSeq(NewSeq);
       Partial = false;
       RRI.clear();
     }
@@ -706,7 +720,19 @@ void BBState::MergeSucc(const BBState &Other) {
 
 /// Enable/disable ARC sequence annotations.
 static cl::opt<bool>
-EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false));
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
+                     cl::desc("Enable emission of arc data flow analysis "
+                              "annotations"));
+static cl::opt<bool>
+DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
+                          cl::desc("Disable check for cfg hazards when "
+                                   "annotating"));
+static cl::opt<std::string>
+ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
+                              cl::init(""),
+                              cl::desc("filter out all data flow annotations "
+                                       "but those that apply to the given "
+                                       "target llvm identifier."));
 
 /// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
 /// instruction so that we can track backwards when post processing via the llvm
@@ -791,6 +817,12 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId,
 /// state of a pointer at the entrance to a basic block.
 static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
                                             Value *Ptr, Sequence Seq) {
+  // If we have a target identifier, make sure that we match it before
+  // continuing.
+  if(!ARCAnnotationTargetIdentifier.empty() &&
+     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+    return;
+
   Module *M = BB->getParent()->getParent();
   LLVMContext &C = M->getContext();
   Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -828,6 +860,12 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
 /// of the pointer at the bottom of the basic block.
 static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
                                               Value *Ptr, Sequence Seq) {
+  // If we have a target identifier, make sure that we match it before emitting
+  // an annotation.
+  if(!ARCAnnotationTargetIdentifier.empty() &&
+     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+    return;
+
   Module *M = BB->getParent()->getParent();
   LLVMContext &C = M->getContext();
   Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -869,6 +907,12 @@ static void GenerateARCAnnotation(unsigned InstMDId,
                                   Sequence OldSeq,
                                   Sequence NewSeq) {
   if (EnableARCAnnotations) {
+    // If we have a target identifier, make sure that we match it before
+    // emitting an annotation.
+    if(!ARCAnnotationTargetIdentifier.empty() &&
+       !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+      return;
+
     // First generate the source annotation on our pointer. This will return an
     // MDString* if Ptr actually comes from an instruction implying we can put
     // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
@@ -909,27 +953,27 @@ static void GenerateARCAnnotation(unsigned InstMDId,
 
 #define ANNOTATE_BB(_states, _bb, _name, _type, _direction)                   \
   do {                                                                        \
-  if (EnableARCAnnotations) {                                                 \
-    for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(),   \
+    if (EnableARCAnnotations) {                                               \
+      for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
           E = (_states)._direction##_ptr_end(); I != E; ++I) {                \
-      Value *Ptr = const_cast<Value*>(I->first);                              \
-      Sequence Seq = I->second.GetSeq();                                      \
-      GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq);           \
+        Value *Ptr = const_cast<Value*>(I->first);                            \
+        Sequence Seq = I->second.GetSeq();                                    \
+        GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq);         \
+      }                                                                       \
     }                                                                         \
-  }                                                                           \
-} while (0)
+  } while (0)
 
-#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock)                       \
     ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
                 Entrance, bottom_up)
-#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock)                         \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend",   \
                 Terminator, bottom_up)
-#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock)                        \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart",  \
                 Entrance, top_down)
-#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock)                          \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend",    \
                 Terminator, top_down)
 
 #else // !ARC_ANNOTATION
@@ -955,9 +999,6 @@ namespace {
     /// them. These are initialized lazily to avoid cluttering up the Module
     /// with unused declarations.
 
-    /// Declaration for ObjC runtime function
-    /// objc_retainAutoreleasedReturnValue.
-    Constant *RetainRVCallee;
     /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
     Constant *AutoreleaseRVCallee;
     /// Declaration for ObjC runtime function objc_release.
@@ -991,7 +1032,6 @@ namespace {
     unsigned ARCAnnotationProvenanceSourceMDKind;
 #endif // ARC_ANNOATIONS
 
-    Constant *getRetainRVCallee(Module *M);
     Constant *getAutoreleaseRVCallee(Module *M);
     Constant *getReleaseCallee(Module *M);
     Constant *getRetainCallee(Module *M);
@@ -1000,7 +1040,6 @@ namespace {
 
     bool IsRetainBlockOptimizable(const Instruction *Inst);
 
-    void OptimizeRetainCall(Function &F, Instruction *Retain);
     bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
     void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
                                    InstructionClass &Class);
@@ -1059,6 +1098,10 @@ namespace {
 
     void OptimizeReturns(Function &F);
 
+#ifndef NDEBUG
+    void GatherStatistics(Function &F, bool AfterOptimization = false);
+#endif
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     virtual bool doInitialization(Module &M);
     virtual bool runOnFunction(Function &F);
@@ -1106,22 +1149,6 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
   return true;
 }
 
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
-  if (!RetainRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttributeSet Attribute =
-      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
-                                  Attribute::NoUnwind);
-    RetainRVCallee =
-      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
-                             Attribute);
-  }
-  return RetainRVCallee;
-}
-
 Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
   if (!AutoreleaseRVCallee) {
     LLVMContext &C = M->getContext();
@@ -1201,38 +1228,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
   return AutoreleaseCallee;
 }
 
-/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
-/// return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
-  ImmutableCallSite CS(GetObjCArg(Retain));
-  const Instruction *Call = CS.getInstruction();
-  if (!Call) return;
-  if (Call->getParent() != Retain->getParent()) return;
-
-  // Check that the call is next to the retain.
-  BasicBlock::const_iterator I = Call;
-  ++I;
-  while (IsNoopInstruction(I)) ++I;
-  if (&*I != Retain)
-    return;
-
-  // Turn it to an objc_retainAutoreleasedReturnValue..
-  Changed = true;
-  ++NumPeeps;
-
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
-                  "objc_retain => objc_retainAutoreleasedReturnValue"
-                  " since the operand is a return value.\n"
-                  "                                Old: "
-               << *Retain << "\n");
-
-  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-
-  DEBUG(dbgs() << "                                New: "
-               << *Retain << "\n");
-}
-
 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
 /// not a return value.  Or, if it can be paired with an
 /// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1269,9 +1264,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
       Changed = true;
       ++NumPeeps;
 
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
-                   << "                                  Erasing " << *RetainRV
-                   << "\n");
+      DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
+                   << "Erasing " << *RetainRV << "\n");
 
       EraseInstruction(I);
       EraseInstruction(RetainRV);
@@ -1283,16 +1277,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
-                  "objc_retainAutoreleasedReturnValue => "
+  DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
                   "objc_retain since the operand is not a return value.\n"
-                  "                                  Old: "
-               << *RetainRV << "\n");
+                  "Old = " << *RetainRV << "\n");
 
   cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
 
-  DEBUG(dbgs() << "                                  New: "
-               << *RetainRV << "\n");
+  DEBUG(dbgs() << "New = " << *RetainRV << "\n");
 
   return false;
 }
@@ -1321,12 +1312,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
-                  "objc_autoreleaseReturnValue => "
+  DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => "
                   "objc_autorelease since its operand is not used as a return "
                   "value.\n"
-                  "                                       Old: "
-               << *AutoreleaseRV << "\n");
+                  "Old = " << *AutoreleaseRV << "\n");
 
   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
   AutoreleaseRVCI->
@@ -1334,8 +1323,7 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
   Class = IC_Autorelease;
 
-  DEBUG(dbgs() << "                                       New: "
-               << *AutoreleaseRV << "\n");
+  DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
 
 }
 
@@ -1359,18 +1347,24 @@ ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
   if (!IsRetainBlockOptimizable(Inst))
     return false;
 
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
+  DEBUG(dbgs() << "Old: " << *Inst << "\n");
   CallInst *RetainBlock = cast<CallInst>(Inst);
   RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
   // Remove copy_on_escape metadata.
   RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
   Class = IC_Retain;
-
+  DEBUG(dbgs() << "New: " << *Inst << "\n");
   return true;
 }
 
 /// Visit each call, one at a time, and make simplifications without doing any
 /// additional analysis.
 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
   // Reset all the flags in preparation for recomputing them.
   UsedInThisFunction = 0;
 
@@ -1380,8 +1374,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
     InstructionClass Class = GetBasicInstructionClass(Inst);
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
-          << Class << "; " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
 
     switch (Class) {
     default: break;
@@ -1397,8 +1390,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_NoopCast:
       Changed = true;
       ++NumNoops;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
-                   " " << *Inst << "\n");
+      DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
       EraseInstruction(Inst);
       continue;
 
@@ -1416,11 +1408,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
                       Constant::getNullValue(Ty),
                       CI);
         llvm::Value *NewValue = UndefValue::get(CI->getType());
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
-                        "pointer-to-weak-pointer is undefined behavior.\n"
-                        "                                     Old = " << *CI <<
-                        "\n                                     New = " <<
-                        *NewValue << "\n");
+        DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+                       "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
         CI->replaceAllUsesWith(NewValue);
         CI->eraseFromParent();
         continue;
@@ -1439,11 +1428,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
                       CI);
 
         llvm::Value *NewValue = UndefValue::get(CI->getType());
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
-                        "pointer-to-weak-pointer is undefined behavior.\n"
-                        "                                     Old = " << *CI <<
-                        "\n                                     New = " <<
-                        *NewValue << "\n");
+        DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+                        "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
 
         CI->replaceAllUsesWith(NewValue);
         CI->eraseFromParent();
@@ -1452,13 +1438,13 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       break;
     }
     case IC_RetainBlock:
-      // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+      // If we strength reduce an objc_retainBlock to an objc_retain, continue
       // onto the objc_retain peephole optimizations. Otherwise break.
       if (!OptimizeRetainBlockCall(F, Inst, Class))
         break;
       // FALLTHROUGH
     case IC_Retain:
-      OptimizeRetainCall(F, Inst);
+      ++NumRetainsBeforeOpt;
       break;
     case IC_RetainRV:
       if (OptimizeRetainRVCall(F, Inst))
@@ -1467,6 +1453,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_AutoreleaseRV:
       OptimizeAutoreleaseRVCall(F, Inst, Class);
       break;
+    case IC_Release:
+      ++NumReleasesBeforeOpt;
+      break;
     }
 
     // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1483,15 +1472,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
         CallInst *NewCall =
           CallInst::Create(getReleaseCallee(F.getParent()),
                            Call->getArgOperand(0), "", Call);
-        NewCall->setMetadata(ImpreciseReleaseMDKind,
-                             MDNode::get(C, ArrayRef<Value *>()));
+        NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
 
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
-                        "objc_autorelease(x) with objc_release(x) since x is "
-                        "otherwise unused.\n"
-                        "                                     Old: " << *Call <<
-                        "\n                                     New: " <<
-                        *NewCall << "\n");
+        DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
+              "since x is otherwise unused.\nOld: " << *Call << "\nNew: "
+              << *NewCall << "\n");
 
         EraseInstruction(Call);
         Inst = NewCall;
@@ -1503,9 +1488,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // a tail keyword.
     if (IsAlwaysTail(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
-            " to function since it can never be passed stack args: " << *Inst <<
-            "\n");
+      DEBUG(dbgs() << "Adding tail keyword to function since it can never be "
+                      "passed stack args: " << *Inst << "\n");
       cast<CallInst>(Inst)->setTailCall();
     }
 
@@ -1513,8 +1497,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // semantics of ARC truly do not do so.
     if (IsNeverTail(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
-            "keyword from function: " << *Inst <<
+      DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst <<
             "\n");
       cast<CallInst>(Inst)->setTailCall(false);
     }
@@ -1522,8 +1505,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // Set nounwind as needed.
     if (IsNoThrow(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
-            " class. Setting nounwind on: " << *Inst << "\n");
+      DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
+                   << "\n");
       cast<CallInst>(Inst)->setDoesNotThrow();
     }
 
@@ -1538,8 +1521,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     if (IsNullOrUndef(Arg)) {
       Changed = true;
       ++NumNoops;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
-            " null are no-ops. Erasing: " << *Inst << "\n");
+      DEBUG(dbgs() << "ARC calls with  null are no-ops. Erasing: " << *Inst
+            << "\n");
       EraseInstruction(Inst);
       continue;
     }
@@ -1633,10 +1616,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
               Clone->setArgOperand(0, Op);
               Clone->insertBefore(InsertPos);
 
-              DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+              DEBUG(dbgs() << "Cloning "
                            << *CInst << "\n"
-                           "                                     And inserting "
-                           "clone at " << *InsertPos << "\n");
+                           "And inserting clone at " << *InsertPos << "\n");
               Worklist.push_back(std::make_pair(Clone, Incoming));
             }
           }
@@ -1648,7 +1630,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       }
     } while (!Worklist.empty());
   }
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// If we have a top down pointer in the S_Use state, make sure that there are
+/// no CFG hazards by checking the states of various bottom up pointers.
+static void CheckForUseCFGHazard(const Sequence SuccSSeq,
+                                 const bool SuccSRRIKnownSafe,
+                                 PtrState &S,
+                                 bool &SomeSuccHasSame,
+                                 bool &AllSuccsHaveSame,
+                                 bool &ShouldContinue) {
+  switch (SuccSSeq) {
+  case S_CanRelease: {
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+      S.ClearSequenceProgress();
+      break;
+    }
+    ShouldContinue = true;
+    break;
+  }
+  case S_Use:
+    SomeSuccHasSame = true;
+    break;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+      AllSuccsHaveSame = false;
+    break;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  case S_None:
+    llvm_unreachable("This should have been handled earlier.");
+  }
+}
+
+/// If we have a Top Down pointer in the S_CanRelease state, make sure that
+/// there are no CFG hazards by checking the states of various bottom up
+/// pointers.
+static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
+                                        const bool SuccSRRIKnownSafe,
+                                        PtrState &S,
+                                        bool &SomeSuccHasSame,
+                                        bool &AllSuccsHaveSame) {
+  switch (SuccSSeq) {
+  case S_CanRelease:
+    SomeSuccHasSame = true;
+    break;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+  case S_Use:
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+      AllSuccsHaveSame = false;
+    break;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  case S_None:
+    llvm_unreachable("This should have been handled earlier.");
+  }
 }
 
 /// Check for critical edges, loop boundaries, irreducible control flow, or
@@ -1661,106 +1701,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
   // If any top-down local-use or possible-dec has a succ which is earlier in
   // the sequence, forget it.
   for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
-       E = MyStates.top_down_ptr_end(); I != E; ++I)
-    switch (I->second.GetSeq()) {
-    default: break;
-    case S_Use: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None:
-        case S_CanRelease: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
-          continue;
-        }
-        case S_Use:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
+         E = MyStates.top_down_ptr_end(); I != E; ++I) {
+    PtrState &S = I->second;
+    const Sequence Seq = I->second.GetSeq();
+
+    // We only care about S_Retain, S_CanRelease, and S_Use.
+    if (Seq == S_None)
+      continue;
+
+    // Make sure that if extra top down states are added in the future that this
+    // code is updated to handle it.
+    assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
+           "Unknown top down sequence state.");
+
+    const Value *Arg = I->first;
+    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    bool SomeSuccHasSame = false;
+    bool AllSuccsHaveSame = true;
+
+    succ_const_iterator SI(TI), SE(TI, false);
+
+    for (; SI != SE; ++SI) {
+      // If VisitBottomUp has pointer information for this successor, take
+      // what we know about it.
+      const DenseMap<const BasicBlock *, BBState>::iterator BBI =
+        BBStates.find(*SI);
+      assert(BBI != BBStates.end());
+      const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+      const Sequence SuccSSeq = SuccS.GetSeq();
+
+      // If bottom up, the pointer is in an S_None state, clear the sequence
+      // progress since the sequence in the bottom up state finished
+      // suggesting a mismatch in between retains/releases. This is true for
+      // all three cases that we are handling here: S_Retain, S_Use, and
+      // S_CanRelease.
+      if (SuccSSeq == S_None) {
         S.ClearSequenceProgress();
-      break;
-    }
-    case S_CanRelease: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
+        continue;
+      }
+
+      // If we have S_Use or S_CanRelease, perform our check for cfg hazard
+      // checks.
+      const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+
+      // *NOTE* We do not use Seq from above here since we are allowing for
+      // S.GetSeq() to change while we are visiting basic blocks.
+      switch(S.GetSeq()) {
+      case S_Use: {
+        bool ShouldContinue = false;
+        CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+                             SomeSuccHasSame, AllSuccsHaveSame,
+                             ShouldContinue);
+        if (ShouldContinue)
           continue;
-        }
-        case S_CanRelease:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-        case S_Use:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
+        break;
+      }
+      case S_CanRelease: {
+        CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
+                                    S, SomeSuccHasSame,
+                                    AllSuccsHaveSame);
+        break;
+      }
+      case S_Retain:
+      case S_None:
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        break;
       }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
-        S.ClearSequenceProgress();
-      break;
-    }
     }
+
+    // If the state at the other end of any of the successor edges
+    // matches the current state, require all edges to match. This
+    // guards against loops in the middle of a sequence.
+    if (SomeSuccHasSame && !AllSuccsHaveSame)
+      S.ClearSequenceProgress();
+  }
 }
 
 bool
@@ -1772,6 +1788,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
   InstructionClass Class = GetInstructionClass(Inst);
   const Value *Arg = 0;
 
+  DEBUG(dbgs() << "Class: " << Class << "\n");
+
   switch (Class) {
   case IC_Release: {
     Arg = GetObjCArg(Inst);
@@ -1786,8 +1804,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     // pairs by making PtrState hold a stack of states, but this is
     // simple and avoids adding overhead for the non-nested case.
     if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
-      DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
-                      "releases (i.e. a release pair)\n");
+      DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
       NestingDetected = true;
     }
 
@@ -1820,7 +1837,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     case S_Release:
     case S_MovableRelease:
     case S_Use:
-      S.RRI.ReverseInsertPts.clear();
+      // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+      // imprecise release, clear our reverse insertion points.
+      if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases())
+        S.RRI.ReverseInsertPts.clear();
       // FALL THROUGH
     case S_CanRelease:
       // Don't do retain+release tracking for IC_RetainRV, because it's
@@ -1835,7 +1855,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
       llvm_unreachable("bottom-up pointer in retain state!");
     }
     ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
-    return NestingDetected;
+    // A retain moving bottom up can be a use.
+    break;
   }
   case IC_AutoreleasepoolPop:
     // Conservatively, clear MyStates for all known pointers.
@@ -1861,6 +1882,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
 
     // Check for possible releases.
     if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+            << "\n");
       S.ClearKnownPositiveRefCount();
       switch (Seq) {
       case S_Use:
@@ -1883,6 +1906,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     case S_Release:
     case S_MovableRelease:
       if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         assert(S.RRI.ReverseInsertPts.empty());
         // If this is an invoke instruction, we're scanning it as part of
         // one of its successor blocks, since we can't insert code after it
@@ -1894,6 +1919,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
         S.SetSeq(S_Use);
         ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
       } else if (Seq == S_Release && IsUser(Class)) {
+        DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         // Non-movable releases depend on any possible objc pointer use.
         S.SetSeq(S_Stop);
         ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
@@ -1907,6 +1934,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
       break;
     case S_Stop:
       if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         S.SetSeq(S_Use);
         ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
       }
@@ -1927,6 +1956,9 @@ bool
 ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
                           DenseMap<const BasicBlock *, BBState> &BBStates,
                           MapVector<Value *, RRInfo> &Retains) {
+
+  DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
+
   bool NestingDetected = false;
   BBState &MyStates = BBStates[BB];
 
@@ -1960,7 +1992,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
     if (isa<InvokeInst>(Inst))
       continue;
 
-    DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
   }
@@ -2033,13 +2065,18 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     PtrState &S = MyStates.getPtrTopDownState(Arg);
     S.ClearKnownPositiveRefCount();
 
-    switch (S.GetSeq()) {
+    Sequence OldSeq = S.GetSeq();
+
+    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+
+    switch (OldSeq) {
     case S_Retain:
     case S_CanRelease:
-      S.RRI.ReverseInsertPts.clear();
+      if (OldSeq == S_Retain || ReleaseMetadata != 0)
+        S.RRI.ReverseInsertPts.clear();
       // FALL THROUGH
     case S_Use:
-      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+      S.RRI.ReleaseMetadata = ReleaseMetadata;
       S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
       Releases[Inst] = S.RRI;
       ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
@@ -2078,6 +2115,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
 
     // Check for possible releases.
     if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+            << "\n");
       S.ClearKnownPositiveRefCount();
       switch (Seq) {
       case S_Retain:
@@ -2105,6 +2144,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     switch (Seq) {
     case S_CanRelease:
       if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         S.SetSeq(S_Use);
         ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
       }
@@ -2127,6 +2168,7 @@ bool
 ObjCARCOpt::VisitTopDown(BasicBlock *BB,
                          DenseMap<const BasicBlock *, BBState> &BBStates,
                          DenseMap<Value *, RRInfo> &Releases) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
   bool NestingDetected = false;
   BBState &MyStates = BBStates[BB];
 
@@ -2156,7 +2198,7 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     Instruction *Inst = I;
 
-    DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
   }
@@ -2165,6 +2207,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
   // bottom of the basic block.
   ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
 
+#ifdef ARC_ANNOTATIONS
+  if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
+#endif
   CheckForCFGHazards(BB, BBStates, MyStates);
   return NestingDetected;
 }
@@ -2296,6 +2341,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
   Type *ArgTy = Arg->getType();
   Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
 
+  DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
+
   // Insert the new retain and release calls.
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        PI = ReleasesToMove.ReverseInsertPts.begin(),
@@ -2308,10 +2355,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Call->setDoesNotThrow();
     Call->setTailCall();
 
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
-                 << "\n"
-                    "                       At insertion point: " << *InsertPt
-                 << "\n");
+    DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n"
+                    "At insertion point: " << *InsertPt << "\n");
   }
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        PI = RetainsToMove.ReverseInsertPts.begin(),
@@ -2328,10 +2373,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     if (ReleasesToMove.IsTailCallRelease)
       Call->setTailCall();
 
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
-                 << "\n"
-                    "                       At insertion point: " << *InsertPt
-                 << "\n");
+    DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n"
+                    "At insertion point: " << *InsertPt << "\n");
   }
 
   // Delete the original retain and release calls.
@@ -2341,8 +2384,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Instruction *OrigRetain = *AI;
     Retains.blot(OrigRetain);
     DeadInsts.push_back(OrigRetain);
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
-                    "\n");
+    DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
   }
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        AI = ReleasesToMove.Calls.begin(),
@@ -2350,9 +2392,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Instruction *OrigRelease = *AI;
     Releases.erase(OrigRelease);
     DeadInsts.push_back(OrigRelease);
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
-                 << "\n");
+    DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
   }
+
 }
 
 bool
@@ -2506,6 +2548,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
   if (OldDelta != 0)
     return false;
 
+#ifdef ARC_ANNOTATIONS
+  // Do not move calls if ARC annotations are requested.
+  if (EnableARCAnnotations)
+    return false;
+#endif // ARC_ANNOTATIONS
+
   Changed = true;
   assert(OldCount != 0 && "Unreachable code?");
   NumRRs += OldCount - NewCount;
@@ -2524,6 +2572,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                                  MapVector<Value *, RRInfo> &Retains,
                                  DenseMap<Value *, RRInfo> &Releases,
                                  Module *M) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
+
   bool AnyPairsCompletelyEliminated = false;
   RRInfo RetainsToMove;
   RRInfo ReleasesToMove;
@@ -2539,8 +2589,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
 
     Instruction *Retain = cast<Instruction>(V);
 
-    DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
-          << "\n");
+    DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
 
     Value *Arg = GetObjCArg(Retain);
 
@@ -2567,12 +2616,6 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                             ReleasesToMove, Arg, KnownSafe,
                             AnyPairsCompletelyEliminated);
 
-#ifdef ARC_ANNOTATIONS
-    // Do not move calls if ARC annotations are requested. If we were to move
-    // calls in this case, we would not be able
-    PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
-#endif // ARC_ANNOTATIONS
-
     if (PerformMoveCalls) {
       // Ok, everything checks out and we're all set. Let's move/delete some
       // code!
@@ -2597,14 +2640,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
 
 /// Weak pointer optimizations.
 void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
+
   // First, do memdep-style RLE and S2L optimizations. We can't use memdep
   // itself because it uses AliasAnalysis and we need to do provenance
   // queries instead.
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
     Instruction *Inst = &*I++;
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
-          "\n");
+    DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
 
     InstructionClass Class = GetBasicInstructionClass(Inst);
     if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
@@ -2752,9 +2796,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
     done:;
     }
   }
-
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
-
 }
 
 /// Identify program paths which execute sequences of retains and releases which
@@ -2820,17 +2861,17 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
                    BB, Autorelease, DepInsts, Visited, PA);
   if (DepInsts.size() != 1)
     return 0;
-  
+
   CallInst *Retain =
     dyn_cast_or_null<CallInst>(*DepInsts.begin());
-  
+
   // Check that we found a retain with the same argument.
   if (!Retain ||
       !IsRetain(GetBasicInstructionClass(Retain)) ||
       GetObjCArg(Retain) != Arg) {
     return 0;
   }
-  
+
   return Retain;
 }
 
@@ -2847,7 +2888,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
                    BB, Ret, DepInsts, V, PA);
   if (DepInsts.size() != 1)
     return 0;
-  
+
   CallInst *Autorelease =
     dyn_cast_or_null<CallInst>(*DepInsts.begin());
   if (!Autorelease)
@@ -2857,7 +2898,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
     return 0;
   if (GetObjCArg(Autorelease) != Arg)
     return 0;
-  
+
   return Autorelease;
 }
 
@@ -2873,60 +2914,87 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
   if (!F.getReturnType()->isPointerTy())
     return;
 
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
+
   SmallPtrSet<Instruction *, 4> DependingInstructions;
   SmallPtrSet<const BasicBlock *, 4> Visited;
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     BasicBlock *BB = FI;
     ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+    DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
 
     if (!Ret)
       continue;
-    
+
     const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
-    
-    // Look for an ``autorelease'' instruction that is a predecssor of Ret and
+
+    // Look for an ``autorelease'' instruction that is a predecessor of Ret and
     // dependent on Arg such that there are no instructions dependent on Arg
     // that need a positive ref count in between the autorelease and Ret.
     CallInst *Autorelease =
       FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
                                              DependingInstructions, Visited,
                                              PA);
-    if (Autorelease) {
-      DependingInstructions.clear();
-      Visited.clear();
-      
-      CallInst *Retain =
-        FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
-                                          DependingInstructions, Visited, PA);
-      if (Retain) {
-        DependingInstructions.clear();
-        Visited.clear();
-        
-        // Check that there is nothing that can affect the reference count
-        // between the retain and the call.  Note that Retain need not be in BB.
-        if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions,
-                                         Visited, PA)) {
-          // If so, we can zap the retain and autorelease.
-          Changed = true;
-          ++NumRets;
-          DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
-                       << "\n                             Erasing: "
-                       << *Autorelease << "\n");
-          EraseInstruction(Retain);
-          EraseInstruction(Autorelease);
-        }
-      }
-    }
-    
     DependingInstructions.clear();
     Visited.clear();
+
+    if (!Autorelease)
+      continue;
+
+    CallInst *Retain =
+      FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+                                        DependingInstructions, Visited, PA);
+    DependingInstructions.clear();
+    Visited.clear();
+
+    if (!Retain)
+      continue;
+
+    // Check that there is nothing that can affect the reference count
+    // between the retain and the call.  Note that Retain need not be in BB.
+    bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain,
+                                                          DependingInstructions,
+                                                          Visited, PA);
+    DependingInstructions.clear();
+    Visited.clear();
+
+    if (!HasSafePathToCall)
+      continue;
+
+    // If so, we can zap the retain and autorelease.
+    Changed = true;
+    ++NumRets;
+    DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: "
+          << *Autorelease << "\n");
+    EraseInstruction(Retain);
+    EraseInstruction(Autorelease);
   }
+}
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+#ifndef NDEBUG
+void
+ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
+  llvm::Statistic &NumRetains =
+    AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt;
+  llvm::Statistic &NumReleases =
+    AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt;
 
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    switch (GetBasicInstructionClass(Inst)) {
+    default:
+      break;
+    case IC_Retain:
+      ++NumRetains;
+      break;
+    case IC_Release:
+      ++NumReleases;
+      break;
+    }
+  }
 }
+#endif
 
 bool ObjCARCOpt::doInitialization(Module &M) {
   if (!EnableARCOpts)
@@ -2958,7 +3026,6 @@ bool ObjCARCOpt::doInitialization(Module &M) {
   // calls finalizers which can have arbitrary side effects.
 
   // These are initialized lazily.
-  RetainRVCallee = 0;
   AutoreleaseRVCallee = 0;
   ReleaseCallee = 0;
   RetainCallee = 0;
@@ -2978,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
 
   Changed = false;
 
-  DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+  DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
+        "\n");
 
   PA.setAA(&getAnalysis<AliasAnalysis>());
 
@@ -2986,7 +3054,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
   // when compiling code that isn't ObjC, skip these if the relevant ObjC
   // library functions aren't declared.
 
-  // Preliminary optimizations. This also computs UsedInThisFunction.
+  // Preliminary optimizations. This also computes UsedInThisFunction.
   OptimizeIndividualCalls(F);
 
   // Optimizations for weak pointers.
@@ -3013,6 +3081,13 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
                             (1 << IC_AutoreleaseRV)))
     OptimizeReturns(F);
 
+  // Gather statistics after optimization.
+#ifndef NDEBUG
+  if (AreStatisticsEnabled()) {
+    GatherStatistics(F, true);
+  }
+#endif
+
   DEBUG(dbgs() << "\n");
 
   return Changed;
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 015fd2e..f0d29c8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
     /// Keeps track of non-local addresses that have been sunk into a block.
     /// This allows us to avoid inserting duplicate code for blocks with
     /// multiple load/stores of the same address.
-    DenseMap<Value*, Value*> SunkAddrs;
+    ValueMap<Value*, Value*> SunkAddrs;
 
     /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
     /// be updated.
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       // start of the block.
       CurInstIterator = BB->begin();
       SunkAddrs.clear();
-    } else {
-      // This address is now available for reassignment, so erase the table
-      // entry; we don't want to match some completely different instruction.
-      SunkAddrs[Addr] = 0;
     }
   }
   ++NumMemoryInsts;
@@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   if (!DefIsLiveOut)
     return false;
 
-  // Make sure non of the uses are PHI nodes.
+  // Make sure none of the uses are PHI nodes.
   for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
        UI != E; ++UI) {
     Instruction *User = cast<Instruction>(*UI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 129af8d..f350b9b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
 //===----------------------------------------------------------------------===//
 
 namespace {
+  class GVN;
+  struct AvailableValueInBlock {
+    /// BB - The basic block in question.
+    BasicBlock *BB;
+    enum ValType {
+      SimpleVal,  // A simple offsetted value that is accessed.
+      LoadVal,    // A value produced by a load.
+      MemIntrin   // A memory intrinsic which is loaded from.
+    };
+  
+    /// V - The value that is live out of the block.
+    PointerIntPair<Value *, 2, ValType> Val;
+  
+    /// Offset - The byte offset in Val that is interesting for the load query.
+    unsigned Offset;
+  
+    static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+                                     unsigned Offset = 0) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(V);
+      Res.Val.setInt(SimpleVal);
+      Res.Offset = Offset;
+      return Res;
+    }
+  
+    static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+                                       unsigned Offset = 0) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(MI);
+      Res.Val.setInt(MemIntrin);
+      Res.Offset = Offset;
+      return Res;
+    }
+  
+    static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+                                         unsigned Offset = 0) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(LI);
+      Res.Val.setInt(LoadVal);
+      Res.Offset = Offset;
+      return Res;
+    }
+  
+    bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+    bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+    bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+  
+    Value *getSimpleValue() const {
+      assert(isSimpleValue() && "Wrong accessor");
+      return Val.getPointer();
+    }
+  
+    LoadInst *getCoercedLoadValue() const {
+      assert(isCoercedLoadValue() && "Wrong accessor");
+      return cast<LoadInst>(Val.getPointer());
+    }
+  
+    MemIntrinsic *getMemIntrinValue() const {
+      assert(isMemIntrinValue() && "Wrong accessor");
+      return cast<MemIntrinsic>(Val.getPointer());
+    }
+  
+    /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+    /// defined here to the specified type.  This handles various coercion cases.
+    Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+  };
 
   class GVN : public FunctionPass {
     bool NoLoads;
@@ -519,6 +588,11 @@ namespace {
     BumpPtrAllocator TableAllocator;
 
     SmallVector<Instruction*, 8> InstrsToErase;
+
+    typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+    typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+    typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit GVN(bool noloads = false)
@@ -599,11 +673,17 @@ namespace {
     }
 
 
-    // Helper fuctions
-    // FIXME: eliminate or document these better
+    // Helper fuctions of redundant load elimination 
     bool processLoad(LoadInst *L);
-    bool processInstruction(Instruction *I);
     bool processNonLocalLoad(LoadInst *L);
+    void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
+                                 AvailValInBlkVect &ValuesPerBlock,
+                                 UnavailBlkVect &UnavailableBlocks);
+    bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 
+                        UnavailBlkVect &UnavailableBlocks);
+
+    // Other helper routines
+    bool processInstruction(Instruction *I);
     bool processBlock(BasicBlock *BB);
     void dump(DenseMap<uint32_t, Value*> &d);
     bool iterateOnFunction(Function &F);
@@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
   return ConstantFoldLoadFromConstPtr(Src, &TD);
 }
 
-namespace {
-
-struct AvailableValueInBlock {
-  /// BB - The basic block in question.
-  BasicBlock *BB;
-  enum ValType {
-    SimpleVal,  // A simple offsetted value that is accessed.
-    LoadVal,    // A value produced by a load.
-    MemIntrin   // A memory intrinsic which is loaded from.
-  };
-
-  /// V - The value that is live out of the block.
-  PointerIntPair<Value *, 2, ValType> Val;
-
-  /// Offset - The byte offset in Val that is interesting for the load query.
-  unsigned Offset;
-
-  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
-                                   unsigned Offset = 0) {
-    AvailableValueInBlock Res;
-    Res.BB = BB;
-    Res.Val.setPointer(V);
-    Res.Val.setInt(SimpleVal);
-    Res.Offset = Offset;
-    return Res;
-  }
-
-  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
-                                     unsigned Offset = 0) {
-    AvailableValueInBlock Res;
-    Res.BB = BB;
-    Res.Val.setPointer(MI);
-    Res.Val.setInt(MemIntrin);
-    Res.Offset = Offset;
-    return Res;
-  }
-
-  static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
-                                       unsigned Offset = 0) {
-    AvailableValueInBlock Res;
-    Res.BB = BB;
-    Res.Val.setPointer(LI);
-    Res.Val.setInt(LoadVal);
-    Res.Offset = Offset;
-    return Res;
-  }
-
-  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
-  bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
-  bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
-  Value *getSimpleValue() const {
-    assert(isSimpleValue() && "Wrong accessor");
-    return Val.getPointer();
-  }
-
-  LoadInst *getCoercedLoadValue() const {
-    assert(isCoercedLoadValue() && "Wrong accessor");
-    return cast<LoadInst>(Val.getPointer());
-  }
-
-  MemIntrinsic *getMemIntrinValue() const {
-    assert(isMemIntrinValue() && "Wrong accessor");
-    return cast<MemIntrinsic>(Val.getPointer());
-  }
-
-  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
-  /// defined here to the specified type.  This handles various coercion cases.
-  Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
-    Value *Res;
-    if (isSimpleValue()) {
-      Res = getSimpleValue();
-      if (Res->getType() != LoadTy) {
-        const DataLayout *TD = gvn.getDataLayout();
-        assert(TD && "Need target data to handle type mismatch case");
-        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
-                                   *TD);
-
-        DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
-                     << *getSimpleValue() << '\n'
-                     << *Res << '\n' << "\n\n\n");
-      }
-    } else if (isCoercedLoadValue()) {
-      LoadInst *Load = getCoercedLoadValue();
-      if (Load->getType() == LoadTy && Offset == 0) {
-        Res = Load;
-      } else {
-        Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
-                                  gvn);
-
-        DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
-                     << *getCoercedLoadValue() << '\n'
-                     << *Res << '\n' << "\n\n\n");
-      }
-    } else {
-      const DataLayout *TD = gvn.getDataLayout();
-      assert(TD && "Need target data to handle type mismatch case");
-      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
-                                   LoadTy, BB->getTerminator(), *TD);
-      DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
-                   << "  " << *getMemIntrinValue() << '\n'
-                   << *Res << '\n' << "\n\n\n");
-    }
-    return Res;
-  }
-};
-
-} // end anonymous namespace
 
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
@@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   return V;
 }
 
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+  Value *Res;
+  if (isSimpleValue()) {
+    Res = getSimpleValue();
+    if (Res->getType() != LoadTy) {
+      const DataLayout *TD = gvn.getDataLayout();
+      assert(TD && "Need target data to handle type mismatch case");
+      Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+                                 *TD);
+  
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+                   << *getSimpleValue() << '\n'
+                   << *Res << '\n' << "\n\n\n");
+    }
+  } else if (isCoercedLoadValue()) {
+    LoadInst *Load = getCoercedLoadValue();
+    if (Load->getType() == LoadTy && Offset == 0) {
+      Res = Load;
+    } else {
+      Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+                                gvn);
+  
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+                   << *getCoercedLoadValue() << '\n'
+                   << *Res << '\n' << "\n\n\n");
+    }
+  } else {
+    const DataLayout *TD = gvn.getDataLayout();
+    assert(TD && "Need target data to handle type mismatch case");
+    Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+                                 LoadTy, BB->getTerminator(), *TD);
+    DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+                 << "  " << *getMemIntrinValue() << '\n'
+                 << *Res << '\n' << "\n\n\n");
+  }
+  return Res;
+}
+
 static bool isLifetimeStart(const Instruction *Inst) {
   if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
     return II->getIntrinsicID() == Intrinsic::lifetime_start;
   return false;
 }
 
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
-  // Find the non-local dependencies of the load.
-  SmallVector<NonLocalDepResult, 64> Deps;
-  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
-  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
-  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
-  //             << Deps.size() << *LI << '\n');
-
-  // If we had to process more than one hundred blocks to find the
-  // dependencies, this load isn't worth worrying about.  Optimizing
-  // it will be too expensive.
-  unsigned NumDeps = Deps.size();
-  if (NumDeps > 100)
-    return false;
-
-  // If we had a phi translation failure, we'll have a single entry which is a
-  // clobber in the current block.  Reject this early.
-  if (NumDeps == 1 &&
-      !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
-    DEBUG(
-      dbgs() << "GVN: non-local load ";
-      WriteAsOperand(dbgs(), LI);
-      dbgs() << " has unknown dependencies\n";
-    );
-    return false;
-  }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
+                                  AvailValInBlkVect &ValuesPerBlock,
+                                  UnavailBlkVect &UnavailableBlocks) {
 
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
   // where we have a value available in repl, also keep track of whether we see
   // dependencies that produce an unknown value for the load (such as a call
   // that could potentially clobber the load).
-  SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
-  SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+  unsigned NumDeps = Deps.size();
   for (unsigned i = 0, e = NumDeps; i != e; ++i) {
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     }
 
     UnavailableBlocks.push_back(DepBB);
-    continue;
   }
+}
 
-  // If we have no predecessors that produce a known value for this load, exit
-  // early.
-  if (ValuesPerBlock.empty()) return false;
-
-  // If all of the instructions we depend on produce a known value for this
-  // load, then it is fully redundant and we can use PHI insertion to compute
-  // its value.  Insert PHIs and remove the fully redundant value now.
-  if (UnavailableBlocks.empty()) {
-    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
-    // Perform PHI construction.
-    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
-    LI->replaceAllUsesWith(V);
-
-    if (isa<PHINode>(V))
-      V->takeName(LI);
-    if (V->getType()->getScalarType()->isPointerTy())
-      MD->invalidateCachedPointerInfo(V);
-    markInstructionForDeletion(LI);
-    ++NumGVNLoad;
-    return true;
-  }
-
-  if (!EnablePRE || !EnableLoadPRE)
-    return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 
+                         UnavailBlkVect &UnavailableBlocks) {
   // Okay, we have *some* definitions of the value.  This means that the value
   // is available in some of our (transitive) predecessors.  Lets think about
   // doing PRE of this load.  This will involve inserting a new load into the
@@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
 
-  bool allSingleSucc = true;
   while (TmpBB->getSinglePredecessor()) {
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     // pointer if it is not available.
     PHITransAddr Address(LI->getPointerOperand(), TD);
     Value *LoadPtr = 0;
-    if (allSingleSucc) {
-      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
-                                                  *DT, NewInsts);
-    } else {
-      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
-      LoadPtr = Address.getAddr();
-    }
+    LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                *DT, NewInsts);
 
     // If we couldn't find or insert a computation of this phi translated value,
     // we fail PRE.
@@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
       break;
     }
 
-    // Make sure it is valid to move this load here.  We have to watch out for:
-    //  @1 = getelementptr (i8* p, ...
-    //  test p and branch if == 0
-    //  load @1
-    // It is valid to have the getelementptr before the test, even if p can
-    // be 0, as getelementptr only does address arithmetic.
-    // If we are not pushing the value through any multiple-successor blocks
-    // we do not have this case.  Otherwise, check that the load is safe to
-    // put anywhere; this can be improved, but should be conservatively safe.
-    if (!allSingleSucc &&
-        // FIXME: REEVALUTE THIS.
-        !isSafeToLoadUnconditionally(LoadPtr,
-                                     UnavailablePred->getTerminator(),
-                                     LI->getAlignment(), TD)) {
-      CanDoPRE = false;
-      break;
-    }
-
     I->second = LoadPtr;
   }
 
@@ -1714,6 +1649,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   return true;
 }
 
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+  // Step 1: Find the non-local dependencies of the load.
+  LoadDepVect Deps;
+  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+  // If we had to process more than one hundred blocks to find the
+  // dependencies, this load isn't worth worrying about.  Optimizing
+  // it will be too expensive.
+  unsigned NumDeps = Deps.size();
+  if (NumDeps > 100)
+    return false;
+
+  // If we had a phi translation failure, we'll have a single entry which is a
+  // clobber in the current block.  Reject this early.
+  if (NumDeps == 1 &&
+      !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+    DEBUG(
+      dbgs() << "GVN: non-local load ";
+      WriteAsOperand(dbgs(), LI);
+      dbgs() << " has unknown dependencies\n";
+    );
+    return false;
+  }
+
+  // Step 2: Analyze the availability of the load
+  AvailValInBlkVect ValuesPerBlock;
+  UnavailBlkVect UnavailableBlocks;
+  AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+  // If we have no predecessors that produce a known value for this load, exit
+  // early.
+  if (ValuesPerBlock.empty())
+    return false;
+
+  // Step 3: Eliminate fully redundancy.
+  //
+  // If all of the instructions we depend on produce a known value for this
+  // load, then it is fully redundant and we can use PHI insertion to compute
+  // its value.  Insert PHIs and remove the fully redundant value now.
+  if (UnavailableBlocks.empty()) {
+    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+    // Perform PHI construction.
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+    LI->replaceAllUsesWith(V);
+
+    if (isa<PHINode>(V))
+      V->takeName(LI);
+    if (V->getType()->getScalarType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(V);
+    markInstructionForDeletion(LI);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  // Step 4: Eliminate partial redundancy.
+  if (!EnablePRE || !EnableLoadPRE)
+    return false;
+
+  return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   // Patch the replacement so that it is not more restrictive than the value
   // being replaced.
diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
index 5d02c68..4796eb2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -200,9 +200,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
   if (!GV || !GV->hasInitializer()) return;
 
   // Should be an array of 'i8*'.
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (InitList == 0) return;
- 
+  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (const GlobalVariable *G =
         dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae95..14c5655 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
-    void simplifyLoopLatch(Loop *L);
-    bool rotateLoop(Loop *L);
+    bool simplifyLoopLatch(Loop *L);
+    bool rotateLoop(Loop *L, bool SimplifiedLatch);
 
   private:
     LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Simplify the loop latch before attempting to rotate the header
   // upward. Rotation may not be needed if the loop tail can be folded into the
   // loop exit.
-  simplifyLoopLatch(L);
+  bool SimplifiedLatch = simplifyLoopLatch(L);
 
   // One loop can be rotated multiple times.
   bool MadeChange = false;
-  while (rotateLoop(L))
+  while (rotateLoop(L, SimplifiedLatch)) {
     MadeChange = true;
-
+    SimplifiedLatch = false;
+  }
   return MadeChange;
 }
 
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
 /// canonical form so downstream passes can handle it.
 ///
 /// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
   BasicBlock *Latch = L->getLoopLatch();
   if (!Latch || Latch->hasAddressTaken())
-    return;
+    return false;
 
   BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
   if (!Jmp || !Jmp->isUnconditional())
-    return;
+    return false;
 
   BasicBlock *LastExit = Latch->getSinglePredecessor();
   if (!LastExit || !L->isLoopExiting(LastExit))
-    return;
+    return false;
 
   BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
   if (!BI)
-    return;
+    return false;
 
   if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
-    return;
+    return false;
 
   DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
         << LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
   if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
     DT->eraseNode(Latch);
   Latch->eraseFromParent();
+  return true;
 }
 
 /// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
 
   // If the loop latch already contains a branch that leaves the loop then the
   // loop is already rotated.
-  if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+  if (OrigLatch == 0)
+    return false;
+
+  // Rotate if either the loop latch does *not* exit the loop, or if the loop
+  // latch was just simplified.
+  if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
     return false;
 
   // Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
   ++NumRotated;
   return true;
 }
-
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 7ee4027..a3c241d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -143,13 +143,9 @@ namespace {
     //   So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier 
     //   than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
     //   "z" in the order of X-Y-Z is better than any other orders.
-    class PtrSortFunctor {
-      ArrayRef<XorOpnd> A;
-
-    public:
-      PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {}
-      bool operator()(unsigned LHSIndex, unsigned RHSIndex) {
-        return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank();
+    struct PtrSortFunctor {
+      bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+        return LHS->getSymbolicRank() < RHS->getSymbolicRank();
       }
     };
   private:
@@ -1199,9 +1195,6 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
   if (X != Opnd2->getSymbolicPart())
     return false;
 
-  const APInt &C1 = Opnd1->getConstPart();
-  const APInt &C2 = Opnd2->getConstPart();
-
   // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
   int DeadInstNum = 1;
   if (Opnd1->getValue()->hasOneUse())
@@ -1219,6 +1212,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
     if (Opnd2->isOrExpr())
       std::swap(Opnd1, Opnd2);
 
+    const APInt &C1 = Opnd1->getConstPart();
+    const APInt &C2 = Opnd2->getConstPart();
     APInt C3((~C1) ^ C2);
 
     // Do not increase code size!
@@ -1234,6 +1229,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
   } else if (Opnd1->isOrExpr()) {
     // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
     //
+    const APInt &C1 = Opnd1->getConstPart();
+    const APInt &C2 = Opnd2->getConstPart();
     APInt C3 = C1 ^ C2;
     
     // Do not increase code size
@@ -1248,6 +1245,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
   } else {
     // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
     //
+    const APInt &C1 = Opnd1->getConstPart();
+    const APInt &C2 = Opnd2->getConstPart();
     APInt C3 = C1 ^ C2;
     Res = createAndInstr(I, X, C3);
   }
@@ -1274,7 +1273,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
     return 0;
 
   SmallVector<XorOpnd, 8> Opnds;
-  SmallVector<unsigned, 8> OpndIndices;
+  SmallVector<XorOpnd*, 8> OpndPtrs;
   Type *Ty = Ops[0].Op->getType();
   APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
 
@@ -1285,23 +1284,29 @@ Value *Reassociate::OptimizeXor(Instruction *I,
       XorOpnd O(V);
       O.setSymbolicRank(getRank(O.getSymbolicPart()));
       Opnds.push_back(O);
-      OpndIndices.push_back(Opnds.size() - 1);
     } else
       ConstOpnd ^= cast<ConstantInt>(V)->getValue();
   }
 
+  // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
+  //  It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
+  //  the "OpndPtrs" as well. For the similar reason, do not fuse this loop
+  //  with the previous loop --- the iterator of the "Opnds" may be invalidated
+  //  when new elements are added to the vector.
+  for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
+    OpndPtrs.push_back(&Opnds[i]);
+
   // Step 2: Sort the Xor-Operands in a way such that the operands containing
   //  the same symbolic value cluster together. For instance, the input operand
   //  sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
   //  ("x | 123", "x & 789", "y & 456").
-  std::sort(OpndIndices.begin(), OpndIndices.end(),
-            XorOpnd::PtrSortFunctor(Opnds));
+  std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
 
   // Step 3: Combine adjacent operands
   XorOpnd *PrevOpnd = 0;
   bool Changed = false;
   for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
-    XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]];
+    XorOpnd *CurrOpnd = OpndPtrs[i];
     // The combined value
     Value *CV;
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index f6bb365..d073e78 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
   V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
                               ConstantVector::get(Mask),
                               Name + ".expand");
-  DEBUG(dbgs() << "    shuffle1: " << *V << "\n");
+  DEBUG(dbgs() << "    shuffle: " << *V << "\n");
 
   Mask.clear();
   for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
-    if (i >= BeginIndex && i < EndIndex)
-      Mask.push_back(IRB.getInt32(i));
-    else
-      Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
-  V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
-                              Name + "insert");
-  DEBUG(dbgs() << "    shuffle2: " << *V << "\n");
+    Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+
+  V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+
+  DEBUG(dbgs() << "    blend: " << *V << "\n");
   return V;
 }
 
@@ -2671,6 +2669,7 @@ private:
 
     StoreInst *NewSI;
     if (BeginOffset == NewAllocaBeginOffset &&
+        EndOffset == NewAllocaEndOffset &&
         canConvertValue(TD, V->getType(), NewAllocaTy)) {
       V = convertValue(TD, IRB, V, NewAllocaTy);
       NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -3050,16 +3049,16 @@ private:
 
   bool visitSelectInst(SelectInst &SI) {
     DEBUG(dbgs() << "    original: " << SI << "\n");
-
-    // Find the operand we need to rewrite here.
-    bool IsTrueVal = SI.getTrueValue() == OldPtr;
-    if (IsTrueVal)
-      assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
-    else
-      assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+    assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
+           "Pointer isn't an operand!");
 
     Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
-    SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+    // Replace the operands which were using the old pointer.
+    if (SI.getOperand(1) == OldPtr)
+      SI.setOperand(1, NewPtr);
+    if (SI.getOperand(2) == OldPtr)
+      SI.setOperand(2, NewPtr);
+
     DEBUG(dbgs() << "          to: " << SI << "\n");
     deleteIfTriviallyDead(OldPtr);
     return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e590a37..bfde334 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
 }
 
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the function, removing them
-// if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the entry block, removing
+// them if they are only used by getelementptr instructions.
 //
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
       continue;
 
     ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
-    if (!IdxVal) {
-      // Non constant GEPs are only a problem on arrays, structs, and pointers
-      // Vectors can be dynamically indexed.
-      // FIXME: Add support for dynamic indexing on arrays.  This should be
-      // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
-      // isn't.
-      if (!(*GEPIt)->isVectorTy())
-        return MarkUnsafe(Info, GEPI);
-      NonConstant = true;
-      NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
-    }
+    if (!IdxVal)
+      return MarkUnsafe(Info, GEPI);
   }
 
   // Compute the offset due to this GEP and check if the alloca has a
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 63d7a1d..be8d39e 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -87,29 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     assert(VMap.count(I) && "No mapping from source argument specified!");
 #endif
 
-  // Clone any attributes.
-  if (NewFunc->arg_size() == OldFunc->arg_size())
-    NewFunc->copyAttributesFrom(OldFunc);
-  else {
-    //Some arguments were deleted with the VMap. Copy arguments one by one
-    for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
-           E = OldFunc->arg_end(); I != E; ++I)
-      if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
-        AttributeSet attrs = OldFunc->getAttributes()
-          .getParamAttributes(I->getArgNo() + 1);
-        if (attrs.getNumSlots() > 0)
-          Anew->addAttr(attrs);
-      }
-    NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttributes(NewFunc->getContext(),
-                                          AttributeSet::ReturnIndex,
-                                          OldFunc->getAttributes()));
-    NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttributes(NewFunc->getContext(),
-                                          AttributeSet::FunctionIndex,
-                                          OldFunc->getAttributes()));
+  AttributeSet OldAttrs = OldFunc->getAttributes();
+  // Clone any argument attributes that are present in the VMap.
+  for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+                                    E = OldFunc->arg_end();
+       I != E; ++I)
+    if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+      AttributeSet attrs =
+          OldAttrs.getParamAttributes(I->getArgNo() + 1);
+      if (attrs.getNumSlots() > 0)
+        Anew->addAttr(attrs);
+    }
 
-  }
+  NewFunc->setAttributes(NewFunc->getAttributes()
+                         .addAttributes(NewFunc->getContext(),
+                                        AttributeSet::ReturnIndex,
+                                        OldAttrs.getRetAttributes()));
+  NewFunc->setAttributes(NewFunc->getAttributes()
+                         .addAttributes(NewFunc->getContext(),
+                                        AttributeSet::FunctionIndex,
+                                        OldAttrs.getFnAttributes()));
 
   // Loop over all of the basic blocks in the function, cloning them as
   // appropriate.  Note that we save BE this way in order to handle cloning of
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index e9828d6..dabb67b9 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -758,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     // If the call site was an invoke instruction, add a branch to the normal
     // destination.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
-      BranchInst::Create(II->getNormalDest(), TheCall);
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+      NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+    }
 
     // If the return instruction returned a value, replace uses of the call with
     // uses of the returned value.
@@ -787,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // "starter" and "ender" blocks.  How we accomplish this depends on whether
   // this is an invoke instruction or a call instruction.
   BasicBlock *AfterCallBB;
+  BranchInst *CreatedBranchToNormalDest = NULL;
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
 
     // Add an unconditional branch to make this look like the CallInst case...
-    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
 
     // Split the basic block.  This guarantees that no PHI nodes will have to be
     // updated due to new incoming edges, and make the invoke case more
     // symmetric to the call case.
-    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+    AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
                                           CalledFunc->getName()+".exit");
 
   } else {  // It's a call
@@ -850,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
 
     // Add a branch to the merge points and remove return instructions.
+    DebugLoc Loc;
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
       ReturnInst *RI = Returns[i];
-      BranchInst::Create(AfterCallBB, RI);
+      BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+      Loc = RI->getDebugLoc();
+      BI->setDebugLoc(Loc);
       RI->eraseFromParent();
     }
+    // We need to set the debug location to *somewhere* inside the
+    // inlined function. The line number may be nonsensical, but the
+    // instruction will at least be associated with the right
+    // function.
+    if (CreatedBranchToNormalDest)
+      CreatedBranchToNormalDest->setDebugLoc(Loc);
   } else if (!Returns.empty()) {
     // Otherwise, if there is exactly one return value, just replace anything
     // using the return value of the call with the computed value.
@@ -874,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                       ReturnBB->getInstList());
 
+    if (CreatedBranchToNormalDest)
+      CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
     // Delete the return instruction now and empty ReturnBB now.
     Returns[0]->eraseFromParent();
     ReturnBB->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index be80d34..12e5b3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -832,7 +832,24 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
 ///  Dbg Intrinsic utilities
 ///
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+  // Since we can't guarantee that the original dbg.declare instrinsic
+  // is removed by LowerDbgDeclare(), we need to make sure that we are
+  // not inserting the same dbg.value intrinsic over and over.
+  llvm::BasicBlock::InstListType::iterator PrevI(I);
+  if (PrevI != I->getParent()->getInstList().begin()) {
+    --PrevI;
+    if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+      if (DVI->getValue() == I->getOperand(0) &&
+          DVI->getOffset() == 0 &&
+          DVI->getVariable() == DIVar)
+        return true;
+  }
+  return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            StoreInst *SI, DIBuilder &Builder) {
@@ -840,6 +857,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (!DIVar.Verify())
     return false;
 
+  if (LdStHasDebugValue(DIVar, SI))
+    return true;
+
   Instruction *DbgVal = NULL;
   // If an argument is zero extended then use argument directly. The ZExt
   // may be zapped by an optimization pass in future.
@@ -863,7 +883,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   return true;
 }
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            LoadInst *LI, DIBuilder &Builder) {
@@ -871,6 +891,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (!DIVar.Verify())
     return false;
 
+  if (LdStHasDebugValue(DIVar, LI))
+    return true;
+
   Instruction *DbgVal = 
     Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
                                     DIVar, LI);
@@ -902,6 +925,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
          E = Dbgs.end(); I != E; ++I) {
     DbgDeclareInst *DDI = *I;
     if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+      // We only remove the dbg.declare intrinsic if all uses are
+      // converted to dbg.value intrinsics.
       bool RemoveDDI = true;
       for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
            UI != E; ++UI)
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 681bf9c..052ad85 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
        cl::desc("Sink common instructions down to the end block"));
 
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+       cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
 STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
   return Changed;
 }
 
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+///   BrBB:
+///     store i32 %add, i32* %arrayidx2
+///     ... // No other stores or function calls (we could be calling a memory
+///     ... // function).
+///     %cmp = icmp ult %x, %y
+///     br i1 %cmp, label %EndBB, label %ThenBB
+///   ThenBB:
+///     store i32 %add5, i32* %arrayidx2
+///     br label EndBB
+///   EndBB:
+///     ...
+///   We are going to transform this into:
+///   BrBB:
+///     store i32 %add, i32* %arrayidx2
+///     ... //
+///     %cmp = icmp ult %x, %y
+///     %add.add5 = select i1 %cmp, i32 %add, %add5
+///     store i32 %add.add5, i32* %arrayidx2
+///     ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+///         hoisted into the predecessor block. 0 otherwise.
+Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+                              BasicBlock *StoreBB, BasicBlock *EndBB) {
+  StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+  if (!StoreToHoist)
+    return 0;
+
+  // Volatile or atomic.
+  if (!StoreToHoist->isSimple())
+    return 0;
+
+  Value *StorePtr = StoreToHoist->getPointerOperand();
+
+  // Look for a store to the same pointer in BrBB.
+  unsigned MaxNumInstToLookAt = 10;
+  for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+       RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+    Instruction *CurI = &*RI;
+
+    // Could be calling an instruction that effects memory like free().
+    if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+      return 0;
+
+    StoreInst *SI = dyn_cast<StoreInst>(CurI);
+    // Found the previous store make sure it stores to the same location.
+    if (SI && SI->getPointerOperand() == StorePtr)
+      // Found the previous store, return its value operand.
+      return SI->getValueOperand();
+    else if (SI)
+      return 0; // Unknown store.
+  }
+
+  return 0;
+}
+
 /// \brief Speculate a conditional basic block flattening the CFG.
 ///
 /// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
   SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
 
   unsigned SpeculationCost = 0;
+  Value *SpeculatedStoreValue = 0;
+  StoreInst *SpeculatedStore = 0;
   for (BasicBlock::iterator BBI = ThenBB->begin(),
                             BBE = llvm::prior(ThenBB->end());
        BBI != BBE; ++BBI) {
@@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
       return false;
 
     // Don't hoist the instruction if it's unsafe or expensive.
-    if (!isSafeToSpeculativelyExecute(I))
+    if (!isSafeToSpeculativelyExecute(I) &&
+        !(HoistCondStores &&
+          (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+                                                         EndBB))))
       return false;
-    if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+    if (!SpeculatedStoreValue &&
+        ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
       return false;
 
+    // Store the store speculation candidate.
+    if (SpeculatedStoreValue)
+      SpeculatedStore = cast<StoreInst>(I);
+
     // Do not hoist the instruction if any of its operands are defined but not
-    // used in this BB. The transformation will prevent the operand from
+    // used in BB. The transformation will prevent the operand from
     // being sunk into the use block.
     for (User::op_iterator i = I->op_begin(), e = I->op_end();
          i != e; ++i) {
@@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
 
   // If there are no PHIs to process, bail early. This helps ensure idempotence
   // as well.
-  if (!HaveRewritablePHIs)
+  if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
     return false;
 
   // If we get here, we can hoist the instruction and if-convert.
   DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
+  // Insert a select of the value of the speculated store.
+  if (SpeculatedStoreValue) {
+    IRBuilder<true, NoFolder> Builder(BI);
+    Value *TrueV = SpeculatedStore->getValueOperand();
+    Value *FalseV = SpeculatedStoreValue;
+    if (Invert)
+      std::swap(TrueV, FalseV);
+    Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+                                    "." + FalseV->getName());
+    SpeculatedStore->setOperand(0, S);
+  }
+
   // Hoist the instructions.
   BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
                            llvm::prior(ThenBB->end()));
@@ -3073,7 +3159,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
   Value *Sub = SI->getCondition();
   if (!Offset->isNullValue())
     Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
-  Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+  Value *Cmp;
+  // If NumCases overflowed, then all possible values jump to the successor.
+  if (NumCases->isNullValue() && SI->getNumCases() != 0)
+    Cmp = ConstantInt::getTrue(SI->getContext());
+  else
+    Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
   BranchInst *NewBI = Builder.CreateCondBr(
       Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
 
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c231704..6bea2dd 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
+    // Do not do any of the following transformations if the fprintf return
+    // value is used, in general the fprintf return value is not compatible
+    // with fwrite(), fputc() or fputs().
+    if (!CI->use_empty())
+      return 0;
+
     // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
     if (CI->getNumArgOperands() == 2) {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
       // These optimizations require DataLayout.
       if (!TD) return 0;
 
-      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
-                                ConstantInt::get(TD->getIntPtrType(*Context),
-                                                 FormatStr.size()),
-                                CI->getArgOperand(0), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+      return EmitFWrite(CI->getArgOperand(1),
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                                         FormatStr.size()),
+                        CI->getArgOperand(0), B, TD, TLI);
     }
 
     // The remaining optimizations require the format string to be "%s" or "%c"
@@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> fputc(chr, F)
       if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
-                               TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+      return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
 
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) --> fputs(str, F)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+      if (!CI->getArgOperand(2)->getType()->isPointerTy())
         return 0;
       return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index 5812d46..c3df215 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index b5941bd..544c5ee 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -57,7 +57,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
       return VM[V] = const_cast<Value*>(V);
     
     // Create a dummy node in case we have a metadata cycle.
-    MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
     VM[V] = Dummy;
     
     // Check all operands to see if any need to be remapped.
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d26154e..08d3725 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 //
 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizer uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
+// and generates target-independent LLVM-IR.
+// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
+// of instructions in order to estimate the profitability of vectorization.
 //
 // The loop vectorizer combines consecutive loop iterations into a single
 // 'wide' iteration. After this transformation the index is incremented
@@ -78,7 +78,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -87,6 +89,7 @@
 #include <map>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 static cl::opt<unsigned>
 VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
@@ -112,9 +115,9 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
 /// We don't unroll loops with a known constant trip count below this number.
 static const unsigned TinyTripCountUnrollThreshold = 128;
 
-/// When performing a runtime memory check, do not check more than this
-/// number of pointers. Notice that the check is quadratic!
-static const unsigned RuntimeMemoryCheckThreshold = 4;
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+static const unsigned RuntimeMemoryCheckThreshold = 8;
 
 /// We use a metadata with this name  to indicate that a scalar loop was
 /// vectorized and that we don't need to re-vectorize it if we run into it
@@ -333,7 +336,7 @@ public:
                             DominatorTree *DT, TargetTransformInfo* TTI,
                             AliasAnalysis *AA, TargetLibraryInfo *TLI)
       : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
-        Induction(0) {}
+        Induction(0), HasFunNoNaNAttr(false) {}
 
   /// This enum represents the kinds of reductions that we support.
   enum ReductionKind {
@@ -343,8 +346,10 @@ public:
     RK_IntegerOr,   ///< Bitwise or logical OR of numbers.
     RK_IntegerAnd,  ///< Bitwise or logical AND of numbers.
     RK_IntegerXor,  ///< Bitwise or logical XOR of numbers.
+    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
     RK_FloatAdd,    ///< Sum of floats.
-    RK_FloatMult    ///< Product of floats.
+    RK_FloatMult,   ///< Product of floats.
+    RK_FloatMinMax  ///< Min/max implemented in terms of select(cmp()).
   };
 
   /// This enum represents the kinds of inductions that we support.
@@ -356,21 +361,52 @@ public:
     IK_ReversePtrInduction  ///< Reverse ptr indvar. Step = - sizeof(elem).
   };
 
+  // This enum represents the kind of minmax reduction.
+  enum MinMaxReductionKind {
+    MRK_Invalid,
+    MRK_UIntMin,
+    MRK_UIntMax,
+    MRK_SIntMin,
+    MRK_SIntMax,
+    MRK_FloatMin,
+    MRK_FloatMax
+  };
+
   /// This POD struct holds information about reduction variables.
   struct ReductionDescriptor {
     ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
-      Kind(RK_NoReduction) {}
+      Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
 
-    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
-        : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
+                        MinMaxReductionKind MK)
+        : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
 
     // The starting value of the reduction.
     // It does not have to be zero!
-    Value *StartValue;
+    TrackingVH<Value> StartValue;
     // The instruction who's value is used outside the loop.
     Instruction *LoopExitInstr;
     // The kind of the reduction.
     ReductionKind Kind;
+    // If this a min/max reduction the kind of reduction.
+    MinMaxReductionKind MinMaxKind;
+  };
+
+  /// This POD struct holds information about a potential reduction operation.
+  struct ReductionInstDesc {
+    ReductionInstDesc(bool IsRedux, Instruction *I) :
+      IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+
+    ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
+      IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+
+    // Is this instruction a reduction candidate.
+    bool IsReduction;
+    // The last instruction in a min/max pattern (select of the select(icmp())
+    // pattern), or the current reduction instruction otherwise.
+    Instruction *PatternLastInst;
+    // If this is a min/max pattern the comparison predicate.
+    MinMaxReductionKind MinMaxKind;
   };
 
   // This POD struct holds information about the memory runtime legality
@@ -387,16 +423,18 @@ public:
     }
 
     /// Insert a pointer and calculate the start and end SCEVs.
-    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
 
     /// This flag indicates if we need to add the runtime check.
     bool Need;
     /// Holds the pointers that we need to check.
-    SmallVector<Value*, 2> Pointers;
+    SmallVector<TrackingVH<Value>, 2> Pointers;
     /// Holds the pointer value at the beginning of the loop.
     SmallVector<const SCEV*, 2> Starts;
     /// Holds the pointer value at the end of the loop.
     SmallVector<const SCEV*, 2> Ends;
+    /// Holds the information if this pointer is used for writing to memory.
+    SmallVector<bool, 2> IsWritePtr;
   };
 
   /// A POD for saving information about induction variables.
@@ -404,7 +442,7 @@ public:
     InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
     InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
     /// Start value.
-    Value *StartValue;
+    TrackingVH<Value> StartValue;
     /// Induction kind.
     InductionKind IK;
   };
@@ -461,6 +499,10 @@ public:
 
   /// Returns the information that we collected about runtime memory check.
   RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+
+  /// This function returns the identity element (or neutral element) for
+  /// the operation K.
+  static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -487,9 +529,17 @@ private:
   /// Returns True, if 'Phi' is the kind of reduction variable for type
   /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
   bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
-  /// Returns true if the instruction I can be a reduction variable of type
-  /// 'Kind'.
-  bool isReductionInstr(Instruction *I, ReductionKind Kind);
+  /// Returns a struct describing if the instruction 'I' can be a reduction
+  /// variable of type 'Kind'. If the reduction is a min/max pattern of
+  /// select(icmp()) this function advances the instruction pointer 'I' from the
+  /// compare instruction to the select instruction and stores this pointer in
+  /// 'PatternLastInst' member of the returned struct.
+  ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
+                                     ReductionInstDesc &Desc);
+  /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+  /// pattern corresponding to a min(X, Y) or max(X, Y).
+  static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
+                                                    ReductionInstDesc &Prev);
   /// Returns the induction kind of Phi. This function may return NoInduction
   /// if the PHI is not an induction variable.
   InductionKind isInductionVariable(PHINode *Phi);
@@ -540,6 +590,8 @@ private:
   /// We need to check that all of the pointers in this list are disjoint
   /// at runtime.
   RuntimePointerCheck PtrRtCheck;
+  /// Can we assume the absence of NaNs.
+  bool HasFunNoNaNAttr;
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -662,6 +714,11 @@ struct LoopVectorize : public LoopPass {
     AA = getAnalysisIfAvailable<AliasAnalysis>();
     TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
 
+    if (DL == NULL) {
+      DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+      return false;
+    }
+
     DEBUG(dbgs() << "LV: Checking a loop in \"" <<
           L->getHeader()->getParent()->getName() << "\"\n");
 
@@ -737,7 +794,8 @@ struct LoopVectorize : public LoopPass {
 
 void
 LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
-                                                       Loop *Lp, Value *Ptr) {
+                                                       Loop *Lp, Value *Ptr,
+                                                       bool WritePtr) {
   const SCEV *Sc = SE->getSCEV(Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
   assert(AR && "Invalid addrec expression");
@@ -746,6 +804,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
   Pointers.push_back(Ptr);
   Starts.push_back(AR->getStart());
   Ends.push_back(ScEnd);
+  IsWritePtr.push_back(WritePtr);
 }
 
 Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
@@ -906,12 +965,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
   Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
   unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
 
+  unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
+  unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+
+  if (ScalarAllocatedSize != VectorElementSize)
+    return scalarizeInstruction(Instr);
+
   // If the pointer is loop invariant or if it is non consecutive,
   // scalarize the load.
-  int Stride = Legal->isConsecutivePtr(Ptr);
-  bool Reverse = Stride < 0;
+  int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+  bool Reverse = ConsecutiveStride < 0;
   bool UniformLoad = LI && Legal->isUniform(Ptr);
-  if (Stride == 0 || UniformLoad)
+  if (!ConsecutiveStride || UniformLoad)
     return scalarizeInstruction(Instr);
 
   Constant *Zero = Builder.getInt32(0);
@@ -1110,6 +1175,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
 
   for (unsigned i = 0; i < NumPointers; ++i) {
     for (unsigned j = i+1; j < NumPointers; ++j) {
+      // No need to check if two readonly pointers intersect.
+      if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
+        continue;
+
       Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
       Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
       Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy, "bc");
@@ -1167,7 +1236,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
   // Mark the old scalar loop with metadata that tells us not to vectorize this
   // loop again if we run into it.
-  MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+  MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None);
   OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
 
   // Some loops have a single integer induction variable, while other loops
@@ -1436,24 +1505,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
 /// This function returns the identity element (or neutral element) for
 /// the operation K.
-static Constant*
-getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
+Constant*
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
   switch (K) {
-  case LoopVectorizationLegality:: RK_IntegerXor:
-  case LoopVectorizationLegality:: RK_IntegerAdd:
-  case LoopVectorizationLegality:: RK_IntegerOr:
+  case RK_IntegerXor:
+  case RK_IntegerAdd:
+  case RK_IntegerOr:
     // Adding, Xoring, Oring zero to a number does not change it.
     return ConstantInt::get(Tp, 0);
-  case LoopVectorizationLegality:: RK_IntegerMult:
+  case RK_IntegerMult:
     // Multiplying a number by 1 does not change it.
     return ConstantInt::get(Tp, 1);
-  case LoopVectorizationLegality:: RK_IntegerAnd:
+  case RK_IntegerAnd:
     // AND-ing a number with an all-1 value does not change it.
     return ConstantInt::get(Tp, -1, true);
-  case LoopVectorizationLegality:: RK_FloatMult:
+  case  RK_FloatMult:
     // Multiplying a number by 1 does not change it.
     return ConstantFP::get(Tp, 1.0L);
-  case LoopVectorizationLegality:: RK_FloatAdd:
+  case  RK_FloatAdd:
     // Adding zero to a number does not change it.
     return ConstantFP::get(Tp, 0.0L);
   default:
@@ -1566,7 +1635,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
 }
 
 /// This function translates the reduction kind to an LLVM binary operator.
-static Instruction::BinaryOps
+static unsigned
 getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
   switch (Kind) {
     case LoopVectorizationLegality::RK_IntegerAdd:
@@ -1583,11 +1652,53 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
       return Instruction::FMul;
     case LoopVectorizationLegality::RK_FloatAdd:
       return Instruction::FAdd;
+    case LoopVectorizationLegality::RK_IntegerMinMax:
+      return Instruction::ICmp;
+    case LoopVectorizationLegality::RK_FloatMinMax:
+      return Instruction::FCmp;
     default:
       llvm_unreachable("Unknown reduction operation");
   }
 }
 
+Value *createMinMaxOp(IRBuilder<> &Builder,
+                      LoopVectorizationLegality::MinMaxReductionKind RK,
+                      Value *Left,
+                      Value *Right) {
+  CmpInst::Predicate P = CmpInst::ICMP_NE;
+  switch (RK) {
+  default:
+    llvm_unreachable("Unknown min/max reduction kind");
+  case LoopVectorizationLegality::MRK_UIntMin:
+    P = CmpInst::ICMP_ULT;
+    break;
+  case LoopVectorizationLegality::MRK_UIntMax:
+    P = CmpInst::ICMP_UGT;
+    break;
+  case LoopVectorizationLegality::MRK_SIntMin:
+    P = CmpInst::ICMP_SLT;
+    break;
+  case LoopVectorizationLegality::MRK_SIntMax:
+    P = CmpInst::ICMP_SGT;
+    break;
+  case LoopVectorizationLegality::MRK_FloatMin:
+    P = CmpInst::FCMP_OLT;
+    break;
+  case LoopVectorizationLegality::MRK_FloatMax:
+    P = CmpInst::FCMP_OGT;
+    break;
+  }
+
+  Value *Cmp;
+  if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+  else
+    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+  return Select;
+}
+
 void
 InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   //===------------------------------------------------===//
@@ -1651,13 +1762,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Find the reduction identity variable. Zero for addition, or, xor,
     // one for multiplication, -1 for And.
-    Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
-    Constant *Identity = ConstantVector::getSplat(VF, Iden);
-
-    // This vector is the Identity vector where the first element is the
-    // incoming scalar reduction.
-    Value *VectorStart = Builder.CreateInsertElement(Identity,
-                                                     RdxDesc.StartValue, Zero);
+    Value *Identity;
+    Value *VectorStart;
+    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+      // MinMax reduction have the start value as their identify.
+      VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+                                                         "minmax.ident");
+    } else {
+      Constant *Iden =
+        LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+                                                        VecTy->getScalarType());
+      Identity = ConstantVector::getSplat(VF, Iden);
+
+      // This vector is the Identity vector where the first element is the
+      // incoming scalar reduction.
+      VectorStart = Builder.CreateInsertElement(Identity,
+                                                RdxDesc.StartValue, Zero);
+    }
 
     // Fix the vector-loop phi.
     // We created the induction variable so we know that the
@@ -1699,10 +1821,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Reduce all of the unrolled parts into a single vector.
     Value *ReducedPartRdx = RdxParts[0];
+    unsigned Op = getReductionBinOp(RdxDesc.Kind);
     for (unsigned part = 1; part < UF; ++part) {
-      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
-      ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
-                                           "bin.rdx");
+      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
+                                             RdxParts[part], ReducedPartRdx,
+                                             "bin.rdx");
+      else
+        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
+                                        ReducedPartRdx, RdxParts[part]);
     }
 
     // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
@@ -1727,8 +1854,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
                                     ConstantVector::get(ShuffleMask),
                                     "rdx.shuf");
 
-      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
-      TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+        TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+                                     "bin.rdx");
+      else
+        TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
     }
 
     // The result is in the first element of the vector.
@@ -1861,18 +1991,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
         // We know that all PHIs in non header blocks are converted into
         // selects, so we don't have to worry about the insertion order and we
         // can just use the builder.
-
         // At this point we generate the predication tree. There may be
         // duplications since this is a simple recursive scan, but future
         // optimizations will clean it up.
-        VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
-                                               P->getParent());
 
-        for (unsigned part = 0; part < UF; ++part) {
-        VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
-        VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
-          Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
-                                             "predphi");
+        unsigned NumIncoming = P->getNumIncomingValues();
+        assert(NumIncoming > 1 && "Invalid PHI");
+
+        // Generate a sequence of selects of the form:
+        // SELECT(Mask3, In3,
+        //      SELECT(Mask2, In2,
+        //                   ( ...)))
+        for (unsigned In = 0; In < NumIncoming; In++) {
+          VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+                                            P->getParent());
+          VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+          for (unsigned part = 0; part < UF; ++part) {
+            // We don't need to 'select' the first PHI operand because it is
+            // the default value if all of the other masks don't match.
+            if (In == 0)
+              Entry[part] = In0[part];
+            else
+              // Select between the current value and the previous incoming edge
+              // based on the incoming mask.
+              Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+                                                 Entry[part], "predphi");
+          }
         }
         continue;
       }
@@ -2153,12 +2298,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
     if (!isa<BranchInst>(BB->getTerminator()))
       return false;
 
-    // We must have at most two predecessors because we need to convert
-    // all PHIs to selects.
-    unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
-    if (Preds > 2)
-      return false;
-
     // We must be able to predicate all blocks that need to be predicated.
     if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
       return false;
@@ -2239,6 +2378,26 @@ bool LoopVectorizationLegality::canVectorize() {
   return true;
 }
 
+/// \brief Check that the instruction has outside loop users and is not an
+/// identified reduction variable.
+static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
+                               SmallPtrSet<Value *, 4> &Reductions) {
+  // Reduction instructions are allowed to have exit users. All other
+  // instructions must not have external users.
+  if (!Reductions.count(Inst))
+    //Check that all of the users of the loop are inside the BB.
+    for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+         I != E; ++I) {
+      Instruction *U = cast<Instruction>(*I);
+      // This user may be a reduction exit value.
+      if (!TheLoop->contains(U)) {
+        DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+        return true;
+      }
+    }
+  return false;
+}
+
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *PreHeader = TheLoop->getLoopPreheader();
   BasicBlock *Header = TheLoop->getHeader();
@@ -2250,6 +2409,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     return false;
   }
 
+  // Look for the attribute signaling the absence of NaNs.
+  Function &F = *Header->getParent();
+  if (F.hasFnAttribute("no-nans-fp-math"))
+    HasFunNoNaNAttr = F.getAttributes().getAttribute(
+      AttributeSet::FunctionIndex,
+      "no-nans-fp-math").getValueAsString() == "true";
+
   // For each block in the loop.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
        be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2259,12 +2425,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
          ++it) {
 
       if (PHINode *Phi = dyn_cast<PHINode>(it)) {
-        // This should not happen because the loop should be normalized.
-        if (Phi->getNumIncomingValues() != 2) {
-          DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
-          return false;
-        }
-
         // Check that this PHI type is allowed.
         if (!Phi->getType()->isIntegerTy() &&
             !Phi->getType()->isFloatingPointTy() &&
@@ -2276,8 +2436,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         // If this PHINode is not in the header block, then we know that we
         // can convert it to select during if-conversion. No need to check if
         // the PHIs in this block are induction or reduction variables.
-        if (*bb != Header)
-          continue;
+        if (*bb != Header) {
+          // Check that this instruction has no outside users or is an
+          // identified reduction value with an outside user.
+          if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+            continue;
+          return false;
+        }
+
+        // We only allow if-converted PHIs with more than two incoming values.
+        if (Phi->getNumIncomingValues() != 2) {
+          DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+          return false;
+        }
 
         // This is the value coming from the preheader.
         Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
@@ -2319,6 +2490,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
           continue;
         }
+        if (AddReductionVar(Phi, RK_IntegerMinMax)) {
+          DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
         if (AddReductionVar(Phi, RK_FloatMult)) {
           DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
           continue;
@@ -2327,6 +2502,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
           continue;
         }
+        if (AddReductionVar(Phi, RK_FloatMinMax)) {
+          DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
 
         DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
         return false;
@@ -2356,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
-      if (!AllowedExit.count(it))
-        //Check that all of the users of the loop are inside the BB.
-        for (Value::use_iterator I = it->use_begin(), E = it->use_end();
-             I != E; ++I) {
-          Instruction *U = cast<Instruction>(*I);
-          // This user may be a reduction exit value.
-          if (!TheLoop->contains(U)) {
-            DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
-            return false;
-          }
-        }
+      if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+        return false;
+
     } // next instr.
 
   }
@@ -2446,13 +2617,6 @@ LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
 
 bool LoopVectorizationLegality::canVectorizeMemory() {
 
-  if (TheLoop->isAnnotatedParallel()) {
-    DEBUG(dbgs()
-          << "LV: A loop annotated parallel, ignore memory dependency "
-          << "checks.\n");
-    return true;
-  }
-
   typedef SmallVector<Value*, 16> ValueVector;
   typedef SmallPtrSet<Value*, 16> ValueSet;
   // Holds the Load and Store *instructions*.
@@ -2461,6 +2625,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   PtrRtCheck.Pointers.clear();
   PtrRtCheck.Need = false;
 
+  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
   // For each block.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
        be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2475,7 +2641,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
       if (it->mayReadFromMemory()) {
         LoadInst *Ld = dyn_cast<LoadInst>(it);
         if (!Ld) return false;
-        if (!Ld->isSimple()) {
+        if (!Ld->isSimple() && !IsAnnotatedParallel) {
           DEBUG(dbgs() << "LV: Found a non-simple load.\n");
           return false;
         }
@@ -2487,7 +2653,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
       if (it->mayWriteToMemory()) {
         StoreInst *St = dyn_cast<StoreInst>(it);
         if (!St) return false;
-        if (!St->isSimple()) {
+        if (!St->isSimple() && !IsAnnotatedParallel) {
           DEBUG(dbgs() << "LV: Found a non-simple store.\n");
           return false;
         }
@@ -2534,6 +2700,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
       ReadWrites.insert(std::make_pair(Ptr, ST));
   }
 
+  if (IsAnnotatedParallel) {
+    DEBUG(dbgs()
+          << "LV: A loop annotated parallel, ignore memory dependency "
+          << "checks.\n");
+    return true;
+  }
+
   for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
     LoadInst *LD = cast<LoadInst>(*I);
     Value* Ptr = LD->getPointerOperand();
@@ -2556,6 +2729,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     return true;
   }
 
+  unsigned NumReadPtrs = 0;
+  unsigned NumWritePtrs = 0;
+
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRT = true;
@@ -2563,7 +2739,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
     Value *V = (*MI).first;
     if (hasComputableBounds(V)) {
-      PtrRtCheck.insert(SE, TheLoop, V);
+      PtrRtCheck.insert(SE, TheLoop, V, true);
+      NumWritePtrs++;
       DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
     } else {
       CanDoRT = false;
@@ -2573,7 +2750,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
     Value *V = (*MI).first;
     if (hasComputableBounds(V)) {
-      PtrRtCheck.insert(SE, TheLoop, V);
+      PtrRtCheck.insert(SE, TheLoop, V, false);
+      NumReadPtrs++;
       DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
     } else {
       CanDoRT = false;
@@ -2583,7 +2761,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
 
   // Check that we did not collect too many pointers or found a
   // unsizeable pointer.
-  if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+  unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
+  DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
+  if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
     PtrRtCheck.reset();
     CanDoRT = false;
   }
@@ -2646,8 +2826,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
                                         Inst,
                                         WriteObjects,
                                         MaxByteWidth)) {
-        DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
-              << *UI <<"\n");
+        DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+                     << "\n");
         return false;
       }
 
@@ -2690,8 +2870,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
                                         Inst,
                                         WriteObjects,
                                         MaxByteWidth)) {
-        DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
-              << *UI <<"\n");
+        DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+                     << "\n");
         return false;
       }
     }
@@ -2737,7 +2917,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
   // used as reduction variables (such as ADD). We may have a single
   // out-of-block user. The cycle must end with the original PHI.
   Instruction *Iter = Phi;
-  while (true) {
+
+  // To recognize min/max patterns formed by a icmp select sequence, we store
+  // the number of instruction we saw from the recognized min/max pattern,
+  // such that we don't stop when we see the phi has two uses (one by the select
+  // and one by the icmp) and to make sure we only see exactly the two
+  // instructions.
+  unsigned NumCmpSelectPatternInst = 0;
+  ReductionInstDesc ReduxDesc(false, 0);
+
+  // Avoid cycles in the chain.
+  SmallPtrSet<Instruction *, 8> VisitedInsts;
+  while (VisitedInsts.insert(Iter)) {
     // If the instruction has no users then this is a broken
     // chain and can't be a reduction variable.
     if (Iter->use_empty())
@@ -2751,9 +2942,6 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
     // Is this a bin op ?
     FoundBinOp |= !isa<PHINode>(Iter);
 
-    // Remember the current instruction.
-    Instruction *OldIter = Iter;
-
     // For each of the *users* of iter.
     for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
          it != e; ++it) {
@@ -2782,25 +2970,35 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
           Iter->hasNUsesOrMore(2))
         continue;
 
-      // We can't have multiple inside users.
-      if (FoundInBlockUser)
+      // We can't have multiple inside users except for a combination of
+      // icmp/select both using the phi.
+      if (FoundInBlockUser && !NumCmpSelectPatternInst)
         return false;
       FoundInBlockUser = true;
 
       // Any reduction instr must be of one of the allowed kinds.
-      if (!isReductionInstr(U, Kind))
+      ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
+      if (!ReduxDesc.IsReduction)
         return false;
 
+      if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
+          ++NumCmpSelectPatternInst;
+      if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
+          ++NumCmpSelectPatternInst;
+
       // Reductions of instructions such as Div, and Sub is only
       // possible if the LHS is the reduction variable.
-      if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+      if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
+          !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
         return false;
 
-      Iter = U;
+      Iter = ReduxDesc.PatternLastInst;
     }
 
-    // If all uses were skipped this can't be a reduction variable.
-    if (Iter == OldIter)
+    // This means we have seen one but not the other instruction of the
+    // pattern or more than just a select and cmp.
+    if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+        NumCmpSelectPatternInst != 2)
       return false;
 
     // We found a reduction var if we have reached the original
@@ -2811,47 +3009,107 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
       AllowedExit.insert(ExitInstruction);
 
       // Save the description of this reduction variable.
-      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+                             ReduxDesc.MinMaxKind);
       Reductions[Phi] = RD;
       // We've ended the cycle. This is a reduction variable if we have an
       // outside user and it has a binary op.
       return FoundBinOp && ExitInstruction;
     }
   }
+
+  return false;
 }
 
-bool
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+LoopVectorizationLegality::ReductionInstDesc
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
+                                                    ReductionInstDesc &Prev) {
+
+  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+         "Expect a select instruction");
+  Instruction *Cmp = 0;
+  SelectInst *Select = 0;
+
+  // We must handle the select(cmp()) as a single instruction. Advance to the
+  // select.
+  if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
+      return ReductionInstDesc(false, I);
+    return ReductionInstDesc(Select, Prev.MinMaxKind);
+  }
+
+  // Only handle single use cases for now.
+  if (!(Select = dyn_cast<SelectInst>(I)))
+    return ReductionInstDesc(false, I);
+  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+    return ReductionInstDesc(false, I);
+  if (!Cmp->hasOneUse())
+    return ReductionInstDesc(false, I);
+
+  Value *CmpLeft;
+  Value *CmpRight;
+
+  // Look for a min/max pattern.
+  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_UIntMin);
+  else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_UIntMax);
+  else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_SIntMax);
+  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_SIntMin);
+  else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMin);
+  else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMax);
+  else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMin);
+  else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMax);
+
+  return ReductionInstDesc(false, I);
+}
+
+LoopVectorizationLegality::ReductionInstDesc
 LoopVectorizationLegality::isReductionInstr(Instruction *I,
-                                            ReductionKind Kind) {
+                                            ReductionKind Kind,
+                                            ReductionInstDesc &Prev) {
   bool FP = I->getType()->isFloatingPointTy();
   bool FastMath = (FP && I->isCommutative() && I->isAssociative());
-
   switch (I->getOpcode()) {
   default:
-    return false;
+    return ReductionInstDesc(false, I);
   case Instruction::PHI:
-      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
-        return false;
-    // possibly.
-    return true;
+      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
+                 Kind != RK_FloatMinMax))
+        return ReductionInstDesc(false, I);
+    return ReductionInstDesc(I, Prev.MinMaxKind);
   case Instruction::Sub:
   case Instruction::Add:
-    return Kind == RK_IntegerAdd;
-  case Instruction::SDiv:
-  case Instruction::UDiv:
+    return ReductionInstDesc(Kind == RK_IntegerAdd, I);
   case Instruction::Mul:
-    return Kind == RK_IntegerMult;
+    return ReductionInstDesc(Kind == RK_IntegerMult, I);
   case Instruction::And:
-    return Kind == RK_IntegerAnd;
+    return ReductionInstDesc(Kind == RK_IntegerAnd, I);
   case Instruction::Or:
-    return Kind == RK_IntegerOr;
+    return ReductionInstDesc(Kind == RK_IntegerOr, I);
   case Instruction::Xor:
-    return Kind == RK_IntegerXor;
+    return ReductionInstDesc(Kind == RK_IntegerXor, I);
   case Instruction::FMul:
-    return Kind == RK_FloatMult && FastMath;
+    return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
   case Instruction::FAdd:
-    return Kind == RK_FloatAdd && FastMath;
-   }
+    return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+  case Instruction::FCmp:
+  case Instruction::ICmp:
+  case Instruction::Select:
+    if (Kind != RK_IntegerMinMax &&
+        (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+      return ReductionInstDesc(false, I);
+    return isMinMaxSelectCmpPattern(I, Prev);
+  }
 }
 
 LoopVectorizationLegality::InductionKind
@@ -3384,9 +3642,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
         TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
 
     // Scalarized loads/stores.
-    int Stride = Legal->isConsecutivePtr(Ptr);
-    bool Reverse = Stride < 0;
-    if (0 == Stride) {
+    int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+    bool Reverse = ConsecutiveStride < 0;
+    unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
+    unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+    if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
       unsigned Cost = 0;
       // The cost of extracting from the value vector and pointer vector.
       Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 0000000..cc30cc9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,348 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+//  "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
+                 cl::desc("Only vectorize trees if the gain is above this "
+                          "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public FunctionPass {
+  typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+  /// Pass identification, replacement for typeid
+  static char ID;
+
+  explicit SLPVectorizer() : FunctionPass(ID) {
+    initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+  }
+
+  ScalarEvolution *SE;
+  DataLayout *DL;
+  TargetTransformInfo *TTI;
+  AliasAnalysis *AA;
+  LoopInfo *LI;
+
+  virtual bool runOnFunction(Function &F) {
+    SE = &getAnalysis<ScalarEvolution>();
+    DL = getAnalysisIfAvailable<DataLayout>();
+    TTI = &getAnalysis<TargetTransformInfo>();
+    AA = &getAnalysis<AliasAnalysis>();
+    LI = &getAnalysis<LoopInfo>();
+
+    StoreRefs.clear();
+    bool Changed = false;
+
+    // Must have DataLayout. We can't require it because some tests run w/o
+    // triple.
+    if (!DL)
+      return false;
+
+    for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
+      BasicBlock *BB = it;
+      bool BBChanged = false;
+
+      // Use the bollom up slp vectorizer to construct chains that start with
+      // he store instructions.
+      BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
+
+      // Vectorize trees that end at reductions.
+      BBChanged |= vectorizeReductions(BB, R);
+
+      // Vectorize trees that end at stores.
+      if (unsigned count = collectStores(BB, R)) {
+        (void)count;
+        DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
+        BBChanged |= vectorizeStoreChains(R);
+      }
+
+      // Try to hoist some of the scalarization code to the preheader.
+      if (BBChanged) hoistGatherSequence(LI, BB, R);
+
+      Changed |= BBChanged;
+    }
+
+    if (Changed) {
+      DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+      DEBUG(verifyFunction(F));
+    }
+    return Changed;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    FunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<TargetTransformInfo>();
+    AU.addRequired<LoopInfo>();
+  }
+
+private:
+
+  /// \brief Collect memory references and sort them according to their base
+  /// object. We sort the stores to their base objects to reduce the cost of the
+  /// quadratic search on the stores. TODO: We can further reduce this cost
+  /// if we flush the chain creation every time we run into a memory barrier.
+  unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
+
+  /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+  bool tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R);
+
+  /// \brief Try to vectorize a list of operands.
+  bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+
+  /// \brief Try to vectorize a chain that may start at the operands of \V;
+  bool tryToVectorize(BinaryOperator *V,  BoUpSLP &R);
+
+  /// \brief Vectorize the stores that were collected in StoreRefs.
+  bool vectorizeStoreChains(BoUpSLP &R);
+
+  /// \brief Try to hoist gather sequences outside of the loop in cases where
+  /// all of the sources are loop invariant.
+  void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+
+  /// \brief Scan the basic block and look for reductions that may start a
+  /// vectorization chain.
+  bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+
+private:
+  StoreListMap StoreRefs;
+};
+
+unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
+  unsigned count = 0;
+  StoreRefs.clear();
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    StoreInst *SI = dyn_cast<StoreInst>(it);
+    if (!SI)
+      continue;
+
+    // Check that the pointer points to scalars.
+    Type *Ty = SI->getValueOperand()->getType();
+    if (Ty->isAggregateType() || Ty->isVectorTy())
+      return 0;
+
+    // Find the base of the GEP.
+    Value *Ptr = SI->getPointerOperand();
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+      Ptr = GEP->getPointerOperand();
+
+    // Save the store locations.
+    StoreRefs[Ptr].push_back(SI);
+    count++;
+  }
+  return count;
+}
+
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R) {
+  if (!A || !B) return false;
+  Value *VL[] = { A, B };
+  return tryToVectorizeList(VL, R);
+}
+
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+  DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+
+  // Check that all of the parts are scalar.
+  for (int i = 0, e = VL.size(); i < e; ++i) {
+    Type *Ty = VL[i]->getType();
+    if (Ty->isAggregateType() || Ty->isVectorTy())
+      return 0;
+  }
+
+  int Cost = R.getTreeCost(VL);
+  int ExtrCost = R.getScalarizationCost(VL);
+  DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+        " Cost of extract:" << ExtrCost << ".\n");
+  if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
+  DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
+  R.vectorizeArith(VL);
+  return true;
+}
+
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V,  BoUpSLP &R) {
+  if (!V) return false;
+  // Try to vectorize V.
+  if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+    return true;
+
+  BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+  BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+  // Try to skip B.
+  if (B && B->hasOneUse()) {
+    BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+    BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+    if (tryToVectorizePair(A, B0, R)) {
+      B->moveBefore(V);
+      return true;
+    }
+    if (tryToVectorizePair(A, B1, R)) {
+      B->moveBefore(V);
+      return true;
+    }
+  }
+
+  // Try to skip A.
+  if (A && A->hasOneUse()) {
+    BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+    BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+    if (tryToVectorizePair(A0, B, R)) {
+      A->moveBefore(V);
+      return true;
+    }
+    if (tryToVectorizePair(A1, B, R)) {
+      A->moveBefore(V);
+      return true;
+    }
+  }
+  return 0;
+}
+
+bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+  bool Changed = false;
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    if (isa<DbgInfoIntrinsic>(it)) continue;
+
+    // Try to vectorize reductions that use PHINodes.
+    if (PHINode *P = dyn_cast<PHINode>(it)) {
+      // Check that the PHI is a reduction PHI.
+      if (P->getNumIncomingValues() != 2) return Changed;
+      Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
+                    (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
+                     0));
+      // Check if this is a Binary Operator.
+      BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+      if (!BI)
+        continue;
+
+      Value *Inst = BI->getOperand(0);
+      if (Inst == P) Inst = BI->getOperand(1);
+      Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+      continue;
+    }
+
+    // Try to vectorize trees that start at compare instructions.
+    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+      if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
+        Changed |= true;
+        continue;
+      }
+      for (int i = 0; i < 2; ++i)
+        if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
+          Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+      continue;
+    }
+  }
+
+  return Changed;
+}
+
+bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+  bool Changed = false;
+  // Attempt to sort and vectorize each of the store-groups.
+  for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+       it != e; ++it) {
+    if (it->second.size() < 2)
+      continue;
+
+    DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
+          it->second.size() << ".\n");
+
+    Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+  }
+  return Changed;
+}
+
+void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
+                                        BoUpSLP &R) {
+  // Check if this block is inside a loop.
+  Loop *L = LI->getLoopFor(BB);
+  if (!L)
+    return;
+
+  // Check if it has a preheader.
+  BasicBlock *PreHeader = L->getLoopPreheader();
+  if (!PreHeader)
+    return;
+
+  // Mark the insertion point for the block.
+  Instruction *Location = PreHeader->getTerminator();
+
+  BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
+  for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
+       it != e; ++it) {
+    InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+    // The InsertElement sequence can be simplified into a constant.
+    if (!Insert)
+      continue;
+
+    // If the vector or the element that we insert into it are
+    // instructions that are defined in this basic block then we can't
+    // hoist this instruction.
+    Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+    Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+    if (CurrVec && L->contains(CurrVec)) continue;
+    if (NewElem && L->contains(NewElem)) continue;
+
+    // We can hoist this instruction. Move it to the pre-header.
+    Insert->moveBefore(Location);
+  }
+}
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+  Pass *createSLPVectorizerPass() {
+    return new SLPVectorizer();
+  }
+}
+
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
new file mode 100644
index 0000000..9b94366
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
@@ -0,0 +1,730 @@
+//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "SLP"
+
+#include "VecUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+static const unsigned MinVecRegSize = 128;
+
+static const unsigned RecursionMaxDepth = 6;
+
+namespace llvm {
+
+BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
+                 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
+  BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp)  {
+  numberInstructions();
+}
+
+void BoUpSLP::numberInstructions() {
+  int Loc = 0;
+  InstrIdx.clear();
+  InstrVec.clear();
+  // Number the instructions in the block.
+  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
+    InstrIdx[it] = Loc++;
+    InstrVec.push_back(it);
+    assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+  }
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
+  return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
+  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
+  return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+  Value *PtrA = getPointerOperand(A);
+  Value *PtrB = getPointerOperand(B);
+  unsigned ASA = getAddressSpaceOperand(A);
+  unsigned ASB = getAddressSpaceOperand(B);
+
+  // Check that the address spaces match and that the pointers are valid.
+  if (!PtrA || !PtrB || (ASA != ASB)) return false;
+
+  // Check that A and B are of the same type.
+  if (PtrA->getType() != PtrB->getType()) return false;
+
+  // Calculate the distance.
+  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+  const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
+  const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
+
+  // Non constant distance.
+  if (!ConstOffSCEV) return false;
+
+  int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
+  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+  // The Instructions are connsecutive if the size of the first load/store is
+  // the same as the offset.
+  int64_t Sz = DL->getTypeStoreSize(Ty);
+  return ((-Offset) == Sz);
+}
+
+bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
+  Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
+  unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+  unsigned VF = MinVecRegSize / Sz;
+
+  if (!isPowerOf2_32(Sz) || VF < 2) return false;
+
+  bool Changed = false;
+  // Look for profitable vectorizable trees at all offsets, starting at zero.
+  for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
+    if (i + VF > e) return Changed;
+    DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
+    ArrayRef<Value *> Operands = Chain.slice(i, VF);
+
+    int Cost = getTreeCost(Operands);
+    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+    if (Cost < CostThreshold) {
+      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+      vectorizeTree(Operands, VF);
+      i += VF - 1;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
+  ValueSet Heads, Tails;
+  SmallDenseMap<Value*, Value*> ConsecutiveChain;
+
+  // We may run into multiple chains that merge into a single chain. We mark the
+  // stores that we vectorized so that we don't visit the same store twice.
+  ValueSet VectorizedStores;
+  bool Changed = false;
+
+  // Do a quadratic search on all of the given stores and find
+  // all of the pairs of loads that follow each other.
+  for (unsigned i = 0, e = Stores.size(); i < e; ++i)
+    for (unsigned j = 0; j < e; ++j) {
+      if (i == j) continue;
+      if (isConsecutiveAccess(Stores[i], Stores[j])) {
+        Tails.insert(Stores[j]);
+        Heads.insert(Stores[i]);
+        ConsecutiveChain[Stores[i]] = Stores[j];
+      }
+    }
+
+  // For stores that start but don't end a link in the chain:
+  for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+    if (Tails.count(*it)) continue;
+
+    // We found a store instr that starts a chain. Now follow the chain and try
+    // to vectorize it.
+    ValueList Operands;
+    Value *I = *it;
+    // Collect the chain into a list.
+    while (Tails.count(I) || Heads.count(I)) {
+      if (VectorizedStores.count(I)) break;
+      Operands.push_back(I);
+      // Move to the next value in the chain.
+      I = ConsecutiveChain[I];
+    }
+
+    bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
+
+    // Mark the vectorized stores so that we don't vectorize them again.
+    if (Vectorized)
+      VectorizedStores.insert(Operands.begin(), Operands.end());
+    Changed |= Vectorized;
+  }
+
+  return Changed;
+}
+
+int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
+  // Find the type of the operands in VL.
+  Type *ScalarTy = VL[0]->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+  // Find the cost of inserting/extracting values from the vector.
+  return getScalarizationCost(VecTy);
+}
+
+int BoUpSLP::getScalarizationCost(Type *Ty) {
+  int Cost = 0;
+  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+  return Cost;
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
+  return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
+  assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+  BasicBlock::iterator I = Src, E = Dst;
+  /// Scan all of the instruction from SRC to DST and check if
+  /// the source may alias.
+  for (++I; I != E; ++I) {
+    // Ignore store instructions that are marked as 'ignore'.
+    if (MemBarrierIgnoreList.count(I)) continue;
+    if (Src->mayWriteToMemory()) /* Write */ {
+      if (!I->mayReadOrWriteMemory()) continue;
+    } else /* Read */ {
+      if (!I->mayWriteToMemory()) continue;
+    }
+    AliasAnalysis::Location A = getLocation(&*I);
+    AliasAnalysis::Location B = getLocation(Src);
+
+    if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+      return I;
+  }
+  return 0;
+}
+
+void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+  Value *Vec = vectorizeTree(Operands, Operands.size());
+  BasicBlock::iterator Loc = cast<Instruction>(Vec);
+  IRBuilder<> Builder(++Loc);
+  // After vectorizing the operands we need to generate extractelement
+  // instructions and replace all of the uses of the scalar values with
+  // the values that we extracted from the vectorized tree.
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
+    Operands[i]->replaceAllUsesWith(S);
+  }
+}
+
+int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
+  // Get rid of the list of stores that were removed, and from the
+  // lists of instructions with multiple users.
+  MemBarrierIgnoreList.clear();
+  LaneMap.clear();
+  MultiUserVals.clear();
+  MustScalarize.clear();
+
+  // Scan the tree and find which value is used by which lane, and which values
+  // must be scalarized.
+  getTreeUses_rec(VL, 0);
+
+  // Check that instructions with multiple users can be vectorized. Mark unsafe
+  // instructions.
+  for (ValueSet::iterator it = MultiUserVals.begin(),
+       e = MultiUserVals.end(); it != e; ++it) {
+    // Check that all of the users of this instr are within the tree
+    // and that they are all from the same lane.
+    int Lane = -1;
+    for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
+         I != E; ++I) {
+      if (LaneMap.find(*I) == LaneMap.end()) {
+        MustScalarize.insert(*it);
+        DEBUG(dbgs()<<"SLP: Adding " << **it <<
+              " to MustScalarize because of an out of tree usage.\n");
+        break;
+      }
+      if (Lane == -1) Lane = LaneMap[*I];
+      if (Lane != LaneMap[*I]) {
+        MustScalarize.insert(*it);
+        DEBUG(dbgs()<<"Adding " << **it <<
+              " to MustScalarize because multiple lane use it: "
+              << Lane << " and " << LaneMap[*I] << ".\n");
+        break;
+      }
+    }
+  }
+
+  // Now calculate the cost of vectorizing the tree.
+  return getTreeCost_rec(VL, 0);
+}
+
+void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
+  if (Depth == RecursionMaxDepth) return;
+
+  // Don't handle vectors.
+  if (VL[0]->getType()->isVectorTy()) return;
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    if (SI->getValueOperand()->getType()->isVectorTy()) return;
+
+  // Check if all of the operands are constants.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If one of the instructions is out of this BB, we need to scalarize all.
+    if (I && I->getParent() != BB) return;
+  }
+
+  // If all of the operands are identical or constant we have a simple solution.
+  if (AllConst || AllSameScalar) return;
+
+  // Scalarize unknown structures.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (!VL0) return;
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return;
+  }
+
+  // Mark instructions with multiple users.
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // Remember to check if all of the users of this instr are vectorized
+    // within our tree.
+    if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
+  }
+
+  for (int i = 0, e = VL.size(); i < e; ++i) {
+    // Check that the instruction is only used within
+    // one lane.
+    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
+    // Make this instruction as 'seen' and remember the lane.
+    LaneMap[VL[i]] = i;
+  }
+
+  switch (Opcode) {
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::BitCast:
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+        ValueList Operands;
+        // Prepare the operand vector.
+        for (unsigned j = 0; j < VL.size(); ++j)
+          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+        getTreeUses_rec(Operands, Depth+1);
+      }
+      return;
+    }
+    case Instruction::Store: {
+      ValueList Operands;
+      for (unsigned j = 0; j < VL.size(); ++j)
+        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      getTreeUses_rec(Operands, Depth+1);
+      return;
+    }
+    default:
+    return;
+  }
+}
+
+int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
+  Type *ScalarTy = VL[0]->getType();
+
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+
+  /// Don't mess with vectors.
+  if (ScalarTy->isVectorTy()) return max_cost;
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+  if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
+
+  // Check if all of the operands are constants.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  bool MustScalarizeFlag = false;
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    // Must have a single use.
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    MustScalarizeFlag |= MustScalarize.count(VL[i]);
+    // This instruction is outside the basic block.
+    if (I && I->getParent() != BB)
+      return getScalarizationCost(VecTy);
+  }
+
+  // Is this a simple vector constant.
+  if (AllConst) return 0;
+
+  // If all of the operands are identical we can broadcast them.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (AllSameScalar) {
+    // If we are in a loop, and this is not an instruction (e.g. constant or
+    // argument) or the instruction is defined outside the loop then assume
+    // that the cost is zero.
+    if (L && (!VL0 || !L->contains(VL0)))
+      return 0;
+
+    // We need to broadcast the scalar.
+    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+  }
+
+  // If this is not a constant, or a scalar from outside the loop then we
+  // need to scalarize it.
+  if (MustScalarizeFlag)
+    return getScalarizationCost(VecTy);
+
+  if (!VL0) return getScalarizationCost(VecTy);
+  assert(VL0->getParent() == BB && "Wrong BB");
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
+  }
+
+  // Check if it is safe to sink the loads or the stores.
+  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+    int MaxIdx = InstrIdx[VL0];
+    for (unsigned i = 1, e = VL.size(); i < e; ++i )
+      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+
+    Instruction *Last = InstrVec[MaxIdx];
+    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
+      if (VL[i] == Last) continue;
+      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
+      if (Barrier) {
+        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
+              *Last << "\n because of " << *Barrier << "\n");
+        return max_cost;
+      }
+    }
+  }
+
+  switch (Opcode) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    int Cost = 0;
+    ValueList Operands;
+    Type *SrcTy = VL0->getOperand(0)->getType();
+    // Prepare the operand vector.
+    for (unsigned j = 0; j < VL.size(); ++j) {
+      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      // Check that the casted type is the same for all users.
+      if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
+        return getScalarizationCost(VecTy);
+    }
+
+    Cost += getTreeCost_rec(Operands, Depth+1);
+    if (Cost >= max_cost) return max_cost;
+
+    // Calculate the cost of this instruction.
+    int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+                                                       VL0->getType(), SrcTy);
+
+    VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+    int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+    Cost += (VecCost - ScalarCost);
+    return Cost;
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    int Cost = 0;
+    // Calculate the cost of all of the operands.
+    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+      ValueList Operands;
+      // Prepare the operand vector.
+      for (unsigned j = 0; j < VL.size(); ++j)
+        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+      Cost += getTreeCost_rec(Operands, Depth+1);
+      if (Cost >= max_cost) return max_cost;
+    }
+
+    // Calculate the cost of this instruction.
+    int ScalarCost = VecTy->getNumElements() *
+      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
+
+    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+    Cost += (VecCost - ScalarCost);
+    return Cost;
+  }
+  case Instruction::Load: {
+    // If we are scalarize the loads, add the cost of forming the vector.
+    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
+      if (!isConsecutiveAccess(VL[i], VL[i+1]))
+        return getScalarizationCost(VecTy);
+
+    // Cost of wide load - cost of scalar loads.
+    int ScalarLdCost = VecTy->getNumElements() *
+      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+    return VecLdCost - ScalarLdCost;
+  }
+  case Instruction::Store: {
+    // We know that we can merge the stores. Calculate the cost.
+    int ScalarStCost = VecTy->getNumElements() *
+      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
+    int StoreCost = VecStCost - ScalarStCost;
+
+    ValueList Operands;
+    for (unsigned j = 0; j < VL.size(); ++j) {
+      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      MemBarrierIgnoreList.insert(VL[j]);
+    }
+
+    int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
+    return TotalCost;
+  }
+  default:
+    // Unable to vectorize unknown instructions.
+    return getScalarizationCost(VecTy);
+  }
+}
+
+Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+  int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
+  for (unsigned i = 0; i < VF; ++i )
+    MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+  return InstrVec[MaxIdx + 1];
+}
+
+Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
+  IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
+  Value *Vec = UndefValue::get(Ty);
+  for (unsigned i=0; i < Ty->getNumElements(); ++i) {
+    // Generate the 'InsertElement' instruction.
+    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+    // Remember that this instruction is used as part of a 'gather' sequence.
+    // The caller of the bottom-up slp vectorizer can try to hoist the sequence
+    // if the users are outside of the basic block.
+    GatherInstructions.push_back(Vec);
+  }
+
+  return Vec;
+}
+
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
+  Value *V = vectorizeTree_rec(VL, VF);
+  // We moved some instructions around. We have to number them again
+  // before we can do any analysis.
+  numberInstructions();
+  MustScalarize.clear();
+  return V;
+}
+
+Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
+  Type *ScalarTy = VL[0]->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+  VectorType *VecTy = VectorType::get(ScalarTy, VF);
+
+  // Check if all of the operands are constants or identical.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  for (unsigned i = 0, e = VF; i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    // The instruction must be in the same BB, and it must be vectorizable.
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
+      return Scalarize(VL, VecTy);
+  }
+
+  // Check that this is a simple vector constant.
+  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+
+  // Scalarize unknown structures.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (!VL0) return Scalarize(VL, VecTy);
+
+  if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VF; i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+  }
+
+  switch (Opcode) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    ValueList INVL;
+    for (int i = 0; i < VF; ++i)
+      INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+    Value *InVec = vectorizeTree_rec(INVL, VF);
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    CastInst *CI = dyn_cast<CastInst>(VL0);
+    Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+    VectorizedValues[VL0] = V;
+    return V;
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    ValueList LHSVL, RHSVL;
+    for (int i = 0; i < VF; ++i) {
+      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+    }
+
+    Value *RHS = vectorizeTree_rec(RHSVL, VF);
+    Value *LHS = vectorizeTree_rec(LHSVL, VF);
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
+    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
+    VectorizedValues[VL0] = V;
+    return V;
+  }
+  case Instruction::Load: {
+    LoadInst *LI = cast<LoadInst>(VL0);
+    unsigned Alignment = LI->getAlignment();
+
+    // Check if all of the loads are consecutive.
+    for (unsigned i = 1, e = VF; i < e; ++i)
+      if (!isConsecutiveAccess(VL[i-1], VL[i]))
+        return Scalarize(VL, VecTy);
+
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+                                          VecTy->getPointerTo());
+    LI = Builder.CreateLoad(VecPtr);
+    LI->setAlignment(Alignment);
+    VectorizedValues[VL0] = LI;
+    return LI;
+  }
+  case Instruction::Store: {
+    StoreInst *SI = cast<StoreInst>(VL0);
+    unsigned Alignment = SI->getAlignment();
+
+    ValueList ValueOp;
+    for (int i = 0; i < VF; ++i)
+      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
+
+    Value *VecValue = vectorizeTree_rec(ValueOp, VF);
+
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+                                          VecTy->getPointerTo());
+    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
+
+    for (int i = 0; i < VF; ++i)
+      cast<Instruction>(VL[i])->eraseFromParent();
+    return 0;
+  }
+  default:
+    Value *S = Scalarize(VL, VecTy);
+    VectorizedValues[VL0] = S;
+    return S;
+  }
+}
+
+} // end of namespace
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
new file mode 100644
index 0000000..5456c6c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
@@ -0,0 +1,164 @@
+//===- VecUtils.h - Vectorization Utilities -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of classes and functions manipulate vectors and chains of
+// vectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock; class Instruction; class Type;
+class VectorType; class StoreInst; class Value;
+class ScalarEvolution; class DataLayout;
+class TargetTransformInfo; class AliasAnalysis;
+class Loop;
+
+/// Bottom Up SLP vectorization utility class.
+struct BoUpSLP  {
+  typedef SmallVector<Value*, 8> ValueList;
+  typedef SmallPtrSet<Value*, 16> ValueSet;
+  typedef SmallVector<StoreInst*, 8> StoreList;
+  static const int max_cost = 1<<20;
+
+  // \brief C'tor.
+  BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
+         TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
+
+  /// \brief Take the pointer operand from the Load/Store instruction.
+  /// \returns NULL if this is not a valid Load/Store instruction.
+  static Value *getPointerOperand(Value *I);
+
+  /// \brief Take the address space operand from the Load/Store instruction.
+  /// \returns -1 if this is not a valid Load/Store instruction.
+  static unsigned getAddressSpaceOperand(Value *I);
+
+  /// \returns true if the memory operations A and B are consecutive.
+  bool isConsecutiveAccess(Value *A, Value *B);
+
+  /// \brief Vectorize the tree that starts with the elements in \p VL.
+  /// \returns the vectorized value.
+  Value *vectorizeTree(ArrayRef<Value *> VL, int VF);
+
+  /// \returns the vectorization cost of the subtree that starts at \p VL.
+  /// A negative number means that this is profitable.
+  int getTreeCost(ArrayRef<Value *> VL);
+
+  /// \returns the scalarization cost for this list of values. Assuming that
+  /// this subtree gets vectorized, we may need to extract the values from the
+  /// roots. This method calculates the cost of extracting the values.
+  int getScalarizationCost(ArrayRef<Value *> VL);
+
+  /// \brief Attempts to order and vectorize a sequence of stores. This
+  /// function does a quadratic scan of the given stores.
+  /// \returns true if the basic block was modified.
+  bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
+
+  /// \brief Vectorize a group of scalars into a vector tree.
+  void vectorizeArith(ArrayRef<Value *> Operands);
+
+  /// \returns the list of new instructions that were added in order to collect
+  /// scalars into vectors. This list can be used to further optimize the gather
+  /// sequences.
+  ValueList &getGatherSeqInstructions() {return GatherInstructions; }
+
+private:
+  /// \brief This method contains the recursive part of getTreeCost.
+  int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+  /// \brief This recursive method looks for vectorization hazards such as
+  /// values that are used by multiple users and checks that values are used
+  /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
+  void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+  /// \brief This method contains the recursive part of vectorizeTree.
+  Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);
+
+  /// \brief Number all of the instructions in the block.
+  void numberInstructions();
+
+  ///  \brief Vectorize a sorted sequence of stores.
+  bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
+
+  /// \returns the scalarization cost for this type. Scalarization in this
+  /// context means the creation of vectors from a group of scalars.
+  int getScalarizationCost(Type *Ty);
+
+  /// \returns the AA location that is being access by the instruction.
+  AliasAnalysis::Location getLocation(Instruction *I);
+
+  /// \brief Checks if it is possible to sink an instruction from
+  /// \p Src to \p Dst.
+  /// \returns the pointer to the barrier instruction if we can't sink.
+  Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
+
+  /// \returns the instruction that appears last in the BB from \p VL.
+  /// Only consider the first \p VF elements.
+  Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
+
+  /// \returns a vector from a collection of scalars in \p VL.
+  Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
+
+private:
+  /// Maps instructions to numbers and back.
+  SmallDenseMap<Value*, int> InstrIdx;
+  /// Maps integers to Instructions.
+  std::vector<Instruction*> InstrVec;
+
+  // -- containers that are used during getTreeCost -- //
+
+  /// Contains values that must be scalarized because they are used
+  /// by multiple lanes, or by users outside the tree.
+  /// NOTICE: The vectorization methods also use this set.
+  ValueSet MustScalarize;
+
+  /// Contains a list of values that are used outside the current tree. This
+  /// set must be reset between runs.
+  ValueSet MultiUserVals;
+  /// Maps values in the tree to the vector lanes that uses them. This map must
+  /// be reset between runs of getCost.
+  std::map<Value*, int> LaneMap;
+  /// A list of instructions to ignore while sinking
+  /// memory instructions. This map must be reset between runs of getCost.
+  SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+
+  // -- Containers that are used during vectorizeTree -- //
+
+  /// Maps between the first scalar to the vector. This map must be reset
+  ///between runs.
+  DenseMap<Value*, Value*> VectorizedValues;
+
+  // -- Containers that are used after vectorization by the caller -- //
+
+  /// A list of instructions that are used when gathering scalars into vectors.
+  /// In many cases these instructions can be hoisted outside of the BB.
+  /// Iterating over this list is faster than calling LICM.
+  ValueList GatherInstructions;
+
+  // Analysis and block reference.
+  BasicBlock *BB;
+  ScalarEvolution *SE;
+  DataLayout *DL;
+  TargetTransformInfo *TTI;
+  AliasAnalysis *AA;
+  Loop *L;
+};
+
+} // end of namespace
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 19eefd2..a927fe1 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
-   //===-- Vectorize.cpp -----------------------------------------------------===//
+//===-- Vectorize.cpp -----------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -28,6 +28,7 @@ using namespace llvm;
 void llvm::initializeVectorization(PassRegistry &Registry) {
   initializeBBVectorizePass(Registry);
   initializeLoopVectorizePass(Registry);
+  initializeSLPVectorizerPass(Registry);
 }
 
 void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
 void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopVectorizePass());
 }
+
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createSLPVectorizerPass());
+}