435 files changed, 16611 insertions, 9788 deletions
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 308b9e3..bfa3ff1 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -108,6 +108,11 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
   }
 }
 
+static inline bool isInterestingPointer(Value *V) {
+  return V->getType()->isPointerTy()
+      && !isa<ConstantPointerNull>(V);
+}
+
 bool AAEval::runOnFunction(Function &F) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
 
@@ -115,21 +120,31 @@ bool AAEval::runOnFunction(Function &F) {
   SetVector<CallSite> CallSites;
 
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
-    if (I->getType()->isPointerTy())    // Add all pointer arguments
+    if (I->getType()->isPointerTy())    // Add all pointer arguments.
       Pointers.insert(I);
 
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-    if (I->getType()->isPointerTy()) // Add all pointer instructions
+    if (I->getType()->isPointerTy()) // Add all pointer instructions.
       Pointers.insert(&*I);
     Instruction &Inst = *I;
-    User::op_iterator OI = Inst.op_begin();
     CallSite CS = CallSite::get(&Inst);
-    if (CS.getInstruction() &&
-        isa<Function>(CS.getCalledValue()))
-      ++OI;  // Skip actual functions for direct function calls.
-    for (; OI != Inst.op_end(); ++OI)
-      if ((*OI)->getType()->isPointerTy() && !isa<ConstantPointerNull>(*OI))
-        Pointers.insert(*OI);
+    if (CS) {
+      Value *Callee = CS.getCalledValue();
+      // Skip actual functions for direct function calls.
+      if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+        Pointers.insert(Callee);
+      // Consider formals.
+      for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+           AI != AE; ++AI)
+        if (isInterestingPointer(*AI))
+          Pointers.insert(*AI);
+    } else {
+      // Consider all operands.
+      for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+           OI != OE; ++OI)
+        if (isInterestingPointer(*OI))
+          Pointers.insert(*OI);
+    }
 
     if (CS.getInstruction()) CallSites.insert(CS);
   }
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 31a649d..cfe7a1c 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -655,6 +655,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
 AliasAnalysis::AliasResult
 BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
                                const Value *V2, unsigned V2Size) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are.
+  if (V1Size == 0 || V2Size == 0)
+    return NoAlias;
+
   // Strip off any casts if they exist.
   V1 = V1->stripPointerCasts();
   V2 = V2->stripPointerCasts();
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 17c9b86..ad05dd9 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_library(LLVMAnalysis
   LazyValueInfo.cpp
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
+  Lint.cpp
   LiveValues.cpp
   LoopDependenceAnalysis.cpp
   LoopInfo.cpp
@@ -38,6 +39,7 @@ add_llvm_library(LLVMAnalysis
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionExpander.cpp
+  ScalarEvolutionNormalization.cpp
   SparsePropagation.cpp
   Trace.cpp
   ValueTracking.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index dda1fba..37cda02 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -401,7 +401,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
   APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
   for (unsigned i = 1; i != BytesLoaded; ++i) {
     ResultVal <<= 8;
-    ResultVal |= APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1-i]);
+    ResultVal |= RawBytes[BytesLoaded-1-i];
   }
 
   return ConstantInt::get(IntType->getContext(), ResultVal);
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 8ba1902..141b181 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -402,6 +402,17 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
   return getSizeInBits();
 }
 
+/// isInlinedFnArgument - Return trule if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+  assert(CurFn && "Invalid function");
+  if (!getContext().isSubprogram())
+    return false;
+  // This variable is not inlined function argument if its scope 
+  // does not describe current function.
+  return !(DISubprogram(getContext().getNode()).describes(CurFn));
+}
+
 /// describes - Return true if this subprogram provides debugging
 /// information for the function F.
 bool DISubprogram::describes(const Function *F) {
@@ -414,6 +425,13 @@ bool DISubprogram::describes(const Function *F) {
   return false;
 }
 
+unsigned DISubprogram::isOptimized() const     {
+  assert (DbgNode && "Invalid subprogram descriptor!");
+  if (DbgNode->getNumOperands() == 16)
+    return getUnsignedField(15);
+  return 0;
+}
+
 StringRef DIScope::getFilename() const {
   if (!DbgNode)
     return StringRef();
@@ -901,7 +919,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
                                          bool isDefinition,
                                          unsigned VK, unsigned VIndex,
                                          DIType ContainingType,
-                                         bool isArtificial) {
+                                         bool isArtificial,
+                                         bool isOptimized) {
 
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
@@ -918,9 +937,10 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
     ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
     ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
     ContainingType.getNode(),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial)
+    ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized)
   };
-  return DISubprogram(MDNode::get(VMContext, &Elts[0], 15));
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 16));
 }
 
 /// CreateSubprogramDefinition - Create new subprogram descriptor for the
@@ -945,9 +965,10 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration)
     DeclNode->getOperand(11), // Virtuality
     DeclNode->getOperand(12), // VIndex
     DeclNode->getOperand(13), // Containting Type
-    DeclNode->getOperand(14)  // isArtificial
+    DeclNode->getOperand(14), // isArtificial
+    DeclNode->getOperand(15)  // isOptimized
   };
-  return DISubprogram(MDNode::get(VMContext, &Elts[0], 15));
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 16));
 }
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
@@ -1150,10 +1171,8 @@ void DebugInfoFinder::processModule(Module &M) {
     for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
       for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
            ++BI) {
-        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
           processDeclare(DDI);
-          continue;
-        }
         
         DebugLoc Loc = BI->getDebugLoc();
         if (Loc.isUnknown())
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 3af687a..a1676e5 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -83,31 +83,6 @@ struct DOTGraphTraits<PostDominatorTree*>
 }
 
 namespace {
-template <class Analysis, bool OnlyBBS>
-struct GenericGraphViewer : public FunctionPass {
-  std::string Name;
-
-  GenericGraphViewer(std::string GraphName, const void *ID) : FunctionPass(ID) {
-    Name = GraphName;
-  }
-
-  virtual bool runOnFunction(Function &F) {
-    Analysis *Graph;
-    std::string Title, GraphName;
-    Graph = &getAnalysis<Analysis>();
-    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
-    Title = GraphName + " for '" + F.getNameStr() + "' function";
-    ViewGraph(Graph, Name, OnlyBBS, Title);
-
-    return false;
-  }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-    AU.addRequired<Analysis>();
-  }
-};
-
 struct DomViewer
   : public DOTGraphTraitsViewer<DominatorTree, false> {
   static char ID;
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 8c43aa1..2bde56d7 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -158,8 +158,11 @@ private:
   // destroy - Release memory for the call graph
   virtual void destroy() {
     /// CallsExternalNode is not in the function map, delete it explicitly.
-    delete CallsExternalNode;
-    CallsExternalNode = 0;
+    if (CallsExternalNode) {
+      CallsExternalNode->allReferencesDropped();
+      delete CallsExternalNode;
+      CallsExternalNode = 0;
+    }
     CallGraph::destroy();
   }
 };
@@ -181,6 +184,14 @@ void CallGraph::initialize(Module &M) {
 void CallGraph::destroy() {
   if (FunctionMap.empty()) return;
   
+  // Reset all node's use counts to zero before deleting them to prevent an
+  // assertion from firing.
+#ifndef NDEBUG
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+       I != E; ++I)
+    I->second->allReferencesDropped();
+#endif
+  
   for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
       I != E; ++I)
     delete I->second;
@@ -233,14 +244,16 @@ void CallGraphNode::print(raw_ostream &OS) const {
   else
     OS << "Call graph node <<null function>>";
   
-  OS << "<<0x" << this << ">>  #uses=" << getNumReferences() << '\n';
+  OS << "<<" << this << ">>  #uses=" << getNumReferences() << '\n';
 
-  for (const_iterator I = begin(), E = end(); I != E; ++I)
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  CS<" << I->first << "> calls ";
     if (Function *FI = I->second->getFunction())
-      OS << "  Calls function '" << FI->getName() <<"'\n";
-  else
-    OS << "  Calls external node\n";
-  OS << "\n";
+      OS << "function '" << FI->getName() <<"'\n";
+    else
+      OS << "external node\n";
+  }
+  OS << '\n';
 }
 
 void CallGraphNode::dump() const { print(dbgs()); }
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index fb08041..0c01ee5 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -22,11 +22,18 @@
 #include "llvm/PassManagers.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<unsigned> 
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
 //===----------------------------------------------------------------------===//
 // CGPassManager
 //
@@ -81,55 +88,31 @@ public:
   }
   
 private:
-  bool RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
-                    CallGraph &CG, bool &CallGraphUpToDate);
-  void RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, CallGraph &CG,
+  bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                         bool &DevirtualizedCall);
+  
+  bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                    CallGraph &CG, bool &CallGraphUpToDate,
+                    bool &DevirtualizedCall);
+  bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
                         bool IsCheckingMode);
 };
 
-/// PrintCallGraphPass - Print a Module corresponding to a call graph.
-///
-class PrintCallGraphPass : public CallGraphSCCPass {
-private:
-  std::string Banner;
-  raw_ostream &Out;       // raw_ostream to print on.
-
-public:
-  static char ID;
-  PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {}
-  PrintCallGraphPass(const std::string &B, raw_ostream &o)
-      : CallGraphSCCPass(&ID), Banner(B), Out(o) {}
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-  }
-
-  bool runOnSCC(std::vector<CallGraphNode *> &SCC) {
-    Out << Banner;
-    for (std::vector<CallGraphNode *>::iterator n = SCC.begin(), ne = SCC.end();
-         n != ne;
-         ++n) {
-      (*n)->getFunction()->print(Out);
-    }
-    return false;
-  }
-};
-
 } // end anonymous namespace.
 
 char CGPassManager::ID = 0;
 
-char PrintCallGraphPass::ID = 0;
 
-bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
-                                 CallGraph &CG, bool &CallGraphUpToDate) {
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                                 CallGraph &CG, bool &CallGraphUpToDate,
+                                 bool &DevirtualizedCall) {
   bool Changed = false;
   PMDataManager *PM = P->getAsPMDataManager();
 
   if (PM == 0) {
     CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
     if (!CallGraphUpToDate) {
-      RefreshCallGraph(CurSCC, CG, false);
+      DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
       CallGraphUpToDate = true;
     }
 
@@ -154,8 +137,9 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
   FPPassManager *FPP = (FPPassManager*)P;
   
   // Run pass P on all functions in the current SCC.
-  for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
-    if (Function *F = CurSCC[i]->getFunction()) {
+  for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+       I != E; ++I) {
+    if (Function *F = (*I)->getFunction()) {
       dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
       TimeRegion PassTimer(getPassTimer(FPP));
       Changed |= FPP->runOnFunction(*F);
@@ -178,26 +162,39 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
 /// FunctionPasses have potentially munged the callgraph, and can be used after
 /// CallGraphSCC passes to verify that they correctly updated the callgraph.
 ///
-void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call.  This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
                                      CallGraph &CG, bool CheckingMode) {
   DenseMap<Value*, CallGraphNode*> CallSites;
   
   DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
                << " nodes:\n";
-        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
-          CurSCC[i]->dump();
+        for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+             I != E; ++I)
+          (*I)->dump();
         );
 
   bool MadeChange = false;
+  bool DevirtualizedCall = false;
   
   // Scan all functions in the SCC.
-  for (unsigned sccidx = 0, e = CurSCC.size(); sccidx != e; ++sccidx) {
-    CallGraphNode *CGN = CurSCC[sccidx];
+  unsigned FunctionNo = 0;
+  for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+       SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+    CallGraphNode *CGN = *SCCIdx;
     Function *F = CGN->getFunction();
     if (F == 0 || F->isDeclaration()) continue;
     
     // Walk the function body looking for call sites.  Sync up the call sites in
     // CGN with those actually in the function.
+
+    // Keep track of the number of direct and indirect calls that were
+    // invalidated and removed.
+    unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
     
     // Get the set of call sites currently in the function.
     for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
@@ -216,6 +213,12 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
         
+        // If this was an indirect call site, count it.
+        if (I->second->getFunction() == 0)
+          ++NumIndirectRemoved;
+        else 
+          ++NumDirectRemoved;
+        
         // Just remove the edge from the set of callees, keep track of whether
         // I points to the last element of the vector.
         bool WasLast = I + 1 == E;
@@ -237,6 +240,9 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
     }
     
     // Loop over all of the instructions in the function, getting the callsites.
+    // Keep track of the number of direct/indirect calls added.
+    unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+    
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
         CallSite CS = CallSite::get(I);
@@ -271,19 +277,21 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
           // If not, we either went from a direct call to indirect, indirect to
           // direct, or direct to different direct.
           CallGraphNode *CalleeNode;
-          if (Function *Callee = CS.getCalledFunction())
+          if (Function *Callee = CS.getCalledFunction()) {
             CalleeNode = CG.getOrInsertFunction(Callee);
-          else
+            // Keep track of whether we turned an indirect call into a direct
+            // one.
+            if (ExistingNode->getFunction() == 0) {
+              DevirtualizedCall = true;
+              DEBUG(dbgs() << "  CGSCCPASSMGR: Devirtualized call to '"
+                           << Callee->getName() << "'\n");
+            }
+          } else {
             CalleeNode = CG.getCallsExternalNode();
+          }
 
           // Update the edge target in CGN.
-          for (CallGraphNode::iterator I = CGN->begin(); ; ++I) {
-            assert(I != CGN->end() && "Didn't find call entry");
-            if (I->first == CS.getInstruction()) {
-              I->second = CalleeNode;
-              break;
-            }
-          }
+          CGN->replaceCallEdge(CS, CS, CalleeNode);
           MadeChange = true;
           continue;
         }
@@ -291,19 +299,34 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
 
-        // If the call site didn't exist in the CGN yet, add it.  We assume that
-        // newly introduced call sites won't be indirect.  This could be fixed
-        // in the future.
+        // If the call site didn't exist in the CGN yet, add it.
         CallGraphNode *CalleeNode;
-        if (Function *Callee = CS.getCalledFunction())
+        if (Function *Callee = CS.getCalledFunction()) {
           CalleeNode = CG.getOrInsertFunction(Callee);
-        else
+          ++NumDirectAdded;
+        } else {
           CalleeNode = CG.getCallsExternalNode();
+          ++NumIndirectAdded;
+        }
         
         CGN->addCalledFunction(CS, CalleeNode);
         MadeChange = true;
       }
     
+    // We scanned the old callgraph node, removing invalidated call sites and
+    // then added back newly found call sites.  One thing that can happen is
+    // that an old indirect call site was deleted and replaced with a new direct
+    // call.  In this case, we have devirtualized a call, and CGSCCPM would like
+    // to iteratively optimize the new code.  Unfortunately, we don't really
+    // have a great way to detect when this happens.  As an approximation, we
+    // just look at whether the number of indirect calls is reduced and the
+    // number of direct calls is increased.  There are tons of ways to fool this
+    // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+    // direct call) but this is close enough.
+    if (NumIndirectRemoved > NumIndirectAdded &&
+        NumDirectRemoved < NumDirectAdded)
+      DevirtualizedCall = true;
+    
     // After scanning this function, if we still have entries in callsites, then
     // they are dangling pointers.  WeakVH should save us for this, so abort if
     // this happens.
@@ -311,18 +334,85 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
     
     // Periodically do an explicit clear to remove tombstones when processing
     // large scc's.
-    if ((sccidx & 15) == 0)
+    if ((FunctionNo & 15) == 15)
       CallSites.clear();
   }
 
   DEBUG(if (MadeChange) {
           dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
-          for (unsigned i = 0, e = CurSCC.size(); i != e; ++i)
-            CurSCC[i]->dump();
+          for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+            I != E; ++I)
+              (*I)->dump();
+          if (DevirtualizedCall)
+            dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
          } else {
            dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
          }
         );
+
+  return DevirtualizedCall;
+}
+
+/// RunAllPassesOnSCC -  Execute the body of the entire pass manager on the
+/// specified SCC.  This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                                      bool &DevirtualizedCall) {
+  bool Changed = false;
+  
+  // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+  // up-to-date or not.  The CGSSC pass manager runs two types of passes:
+  // CallGraphSCC Passes and other random function passes.  Because other
+  // random function passes are not CallGraph aware, they may clobber the
+  // call graph by introducing new calls or deleting other ones.  This flag
+  // is set to false when we run a function pass so that we know to clean up
+  // the callgraph when we need to run a CGSCCPass again.
+  bool CallGraphUpToDate = true;
+
+  // Run all passes on current SCC.
+  for (unsigned PassNo = 0, e = getNumContainedPasses();
+       PassNo != e; ++PassNo) {
+    Pass *P = getContainedPass(PassNo);
+    
+    // If we're in -debug-pass=Executions mode, construct the SCC node list,
+    // otherwise avoid constructing this string as it is expensive.
+    if (isPassDebuggingExecutionsOrMore()) {
+      std::string Functions;
+  #ifndef NDEBUG
+      raw_string_ostream OS(Functions);
+      for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+           I != E; ++I) {
+        if (I != CurSCC.begin()) OS << ", ";
+        (*I)->print(OS);
+      }
+      OS.flush();
+  #endif
+      dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+    }
+    dumpRequiredSet(P);
+    
+    initializeAnalysisImpl(P);
+    
+    // Actually run this pass on the current SCC.
+    Changed |= RunPassOnSCC(P, CurSCC, CG,
+                            CallGraphUpToDate, DevirtualizedCall);
+    
+    if (Changed)
+      dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+    dumpPreservedSet(P);
+    
+    verifyPreservedAnalysis(P);      
+    removeNotPreservedAnalysis(P);
+    recordAvailableAnalysis(P);
+    removeDeadPasses(P, "", ON_CG_MSG);
+  }
+  
+  // If the callgraph was left out of date (because the last pass run was a
+  // functionpass), refresh it before we move on to the next SCC.
+  if (!CallGraphUpToDate)
+    DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+  return Changed;
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep track of
@@ -330,72 +420,53 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC,
 bool CGPassManager::runOnModule(Module &M) {
   CallGraph &CG = getAnalysis<CallGraph>();
   bool Changed = doInitialization(CG);
-
-  std::vector<CallGraphNode*> CurSCC;
   
   // Walk the callgraph in bottom-up SCC order.
-  for (scc_iterator<CallGraph*> CGI = scc_begin(&CG), E = scc_end(&CG);
-       CGI != E;) {
+  scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+  CallGraphSCC CurSCC(&CGI);
+  while (!CGI.isAtEnd()) {
     // Copy the current SCC and increment past it so that the pass can hack
     // on the SCC if it wants to without invalidating our iterator.
-    CurSCC = *CGI;
+    std::vector<CallGraphNode*> &NodeVec = *CGI;
+    CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
     ++CGI;
     
+    // At the top level, we run all the passes in this pass manager on the
+    // functions in this SCC.  However, we support iterative compilation in the
+    // case where a function pass devirtualizes a call to a function.  For
+    // example, it is very common for a function pass (often GVN or instcombine)
+    // to eliminate the addressing that feeds into a call.  With that improved
+    // information, we would like the call to be an inline candidate, infer
+    // mod-ref information etc.
+    //
+    // Because of this, we allow iteration up to a specified iteration count.
+    // This only happens in the case of a devirtualized call, so we only burn
+    // compile time in the case that we're making progress.  We also have a hard
+    // iteration count limit in case there is crazy code.
+    unsigned Iteration = 0;
+    bool DevirtualizedCall = false;
+    do {
+      DEBUG(if (Iteration)
+              dbgs() << "  SCCPASSMGR: Re-visiting SCC, iteration #"
+                     << Iteration << '\n');
+      DevirtualizedCall = false;
+      Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+    } while (Iteration++ < MaxIterations && DevirtualizedCall);
     
-    // CallGraphUpToDate - Keep track of whether the callgraph is known to be
-    // up-to-date or not.  The CGSSC pass manager runs two types of passes:
-    // CallGraphSCC Passes and other random function passes.  Because other
-    // random function passes are not CallGraph aware, they may clobber the
-    // call graph by introducing new calls or deleting other ones.  This flag
-    // is set to false when we run a function pass so that we know to clean up
-    // the callgraph when we need to run a CGSCCPass again.
-    bool CallGraphUpToDate = true;
+    if (DevirtualizedCall)
+      DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after " << Iteration
+                   << " times, due to -max-cg-scc-iterations\n");
     
-    // Run all passes on current SCC.
-    for (unsigned PassNo = 0, e = getNumContainedPasses();
-         PassNo != e; ++PassNo) {
-      Pass *P = getContainedPass(PassNo);
-
-      // If we're in -debug-pass=Executions mode, construct the SCC node list,
-      // otherwise avoid constructing this string as it is expensive.
-      if (isPassDebuggingExecutionsOrMore()) {
-        std::string Functions;
-#ifndef NDEBUG
-        raw_string_ostream OS(Functions);
-        for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
-          if (i) OS << ", ";
-          CurSCC[i]->print(OS);
-        }
-        OS.flush();
-#endif
-        dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
-      }
-      dumpRequiredSet(P);
-
-      initializeAnalysisImpl(P);
-
-      // Actually run this pass on the current SCC.
-      Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate);
-
-      if (Changed)
-        dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
-      dumpPreservedSet(P);
-
-      verifyPreservedAnalysis(P);      
-      removeNotPreservedAnalysis(P);
-      recordAvailableAnalysis(P);
-      removeDeadPasses(P, "", ON_CG_MSG);
-    }
+    if (Iteration > MaxSCCIterations)
+      MaxSCCIterations = Iteration;
     
-    // If the callgraph was left out of date (because the last pass run was a
-    // functionpass), refresh it before we move on to the next SCC.
-    if (!CallGraphUpToDate)
-      RefreshCallGraph(CurSCC, CG, false);
   }
   Changed |= doFinalization(CG);
   return Changed;
 }
 
+
 /// Initialize CG
 bool CGPassManager::doInitialization(CallGraph &CG) {
   bool Changed = false;
@@ -426,11 +497,32 @@ bool CGPassManager::doFinalization(CallGraph &CG) {
   return Changed;
 }
 
-Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
-                                          const std::string &Banner) const {
-  return new PrintCallGraphPass(Banner, O);
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+  assert(Old != New && "Should not replace node with self");
+  for (unsigned i = 0; ; ++i) {
+    assert(i != Nodes.size() && "Node not in SCC");
+    if (Nodes[i] != Old) continue;
+    Nodes[i] = New;
+    break;
+  }
+  
+  // Update the active scc_iterator so that it doesn't contain dangling
+  // pointers to the old CallGraphNode.
+  scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+  CGI->ReplaceNode(Old, New);
 }
 
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
 /// Assign pass manager to manage this pass.
 void CallGraphSCCPass::assignPassManager(PMStack &PMS,
                                          PassManagerType PreferredType) {
@@ -475,3 +567,43 @@ void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<CallGraph>();
   AU.addPreserved<CallGraph>();
 }
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+  ///
+  class PrintCallGraphPass : public CallGraphSCCPass {
+    std::string Banner;
+    raw_ostream &Out;       // raw_ostream to print on.
+    
+  public:
+    static char ID;
+    PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {}
+    PrintCallGraphPass(const std::string &B, raw_ostream &o)
+      : CallGraphSCCPass(&ID), Banner(B), Out(o) {}
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    
+    bool runOnSCC(CallGraphSCC &SCC) {
+      Out << Banner;
+      for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+        (*I)->getFunction()->print(Out);
+      return false;
+    }
+  };
+  
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+                                          const std::string &Banner) const {
+  return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 47b5d4a..2c997da 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -36,146 +36,34 @@ Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
 
-/// CollectSubexprs - Split S into subexpressions which can be pulled out into
-/// separate registers.
-static void CollectSubexprs(const SCEV *S,
-                            SmallVectorImpl<const SCEV *> &Ops,
-                            ScalarEvolution &SE) {
-  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    // Break out add operands.
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I)
-      CollectSubexprs(*I, Ops, SE);
-    return;
-  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-    // Split a non-zero base out of an addrec.
-    if (!AR->getStart()->isZero()) {
-      CollectSubexprs(AR->getStart(), Ops, SE);
-      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
-                                       AR->getStepRecurrence(SE),
-                                       AR->getLoop()), Ops, SE);
-      return;
-    }
-  }
-
-  // Otherwise use the value itself.
-  Ops.push_back(S);
-}
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) {
+  // Anything loop-invariant is interesting.
+  if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L))
+    return true;
 
-/// getSCEVStartAndStride - Compute the start and stride of this expression,
-/// returning false if the expression is not a start/stride pair, or true if it
-/// is.  The stride must be a loop invariant expression, but the start may be
-/// a mix of loop invariant and loop variant expressions.  The start cannot,
-/// however, contain an AddRec from a different loop, unless that loop is an
-/// outer loop of the current loop.
-static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
-                                  const SCEV *&Start, const SCEV *&Stride,
-                                  ScalarEvolution *SE, DominatorTree *DT) {
-  const SCEV *TheAddRec = Start;   // Initialize to zero.
-
-  // If the outer level is an AddExpr, the operands are all start values except
-  // for a nested AddRecExpr.
-  if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
-    for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
-      if (const SCEVAddRecExpr *AddRec =
-             dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
-        TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
-      else
-        Start = SE->getAddExpr(Start, AE->getOperand(i));
-  } else if (isa<SCEVAddRecExpr>(SH)) {
-    TheAddRec = SH;
-  } else {
-    return false;  // not analyzable.
+  // An addrec is interesting if it's affine or if it has an interesting start.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Keep things simple. Don't touch loop-variant strides.
+    if (AR->getLoop() == L)
+      return AR->isAffine() || !L->contains(I);
+    // Otherwise recurse to see if the start value is interesting.
+    return isInteresting(AR->getStart(), I, L);
   }
 
-  // Break down TheAddRec into its component parts.
-  SmallVector<const SCEV *, 4> Subexprs;
-  CollectSubexprs(TheAddRec, Subexprs, *SE);
-
-  // Look for an addrec on the current loop among the parts.
-  const SCEV *AddRecStride = 0;
-  for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
-       E = Subexprs.end(); I != E; ++I) {
-    const SCEV *S = *I;
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-      if (AR->getLoop() == L) {
-        *I = AR->getStart();
-        AddRecStride = AR->getStepRecurrence(*SE);
-        break;
-      }
-  }
-  if (!AddRecStride)
+  // An add is interesting if any of its operands is.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+         OI != OE; ++OI)
+      if (isInteresting(*OI, I, L))
+        return true;
     return false;
-
-  // Add up everything else into a start value (which may not be
-  // loop-invariant).
-  const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
-
-  // Use getSCEVAtScope to attempt to simplify other loops out of
-  // the picture.
-  AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-
-  Start = SE->getAddExpr(Start, AddRecStart);
-
-  // If stride is an instruction, make sure it properly dominates the header.
-  // Otherwise we could end up with a use before def situation.
-  if (!isa<SCEVConstant>(AddRecStride)) {
-    BasicBlock *Header = L->getHeader();
-    if (!AddRecStride->properlyDominates(Header, DT))
-      return false;
-
-    DEBUG(dbgs() << "[";
-          WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
-          dbgs() << "] Variable stride: " << *AddRecStride << "\n");
   }
 
-  Stride = AddRecStride;
-  return true;
-}
-
-/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
-/// and now we need to decide whether the user should use the preinc or post-inc
-/// value.  If this user should use the post-inc version of the IV, return true.
-///
-/// Choosing wrong here can break dominance properties (if we choose to use the
-/// post-inc value when we cannot) or it can end up adding extra live-ranges to
-/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
-/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
-                                       const Loop *L, DominatorTree *DT) {
-  // If the user is in the loop, use the preinc value.
-  if (L->contains(User)) return false;
-
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  if (!LatchBlock)
-    return false;
-
-  // Ok, the user is outside of the loop.  If it is dominated by the latch
-  // block, use the post-inc value.
-  if (DT->dominates(LatchBlock, User->getParent()))
-    return true;
-
-  // There is one case we have to be careful of: PHI nodes.  These little guys
-  // can live in blocks that are not dominated by the latch block, but (since
-  // their uses occur in the predecessor block, not the block the PHI lives in)
-  // should still use the post-inc value.  Check for this case now.
-  PHINode *PN = dyn_cast<PHINode>(User);
-  if (!PN) return false;  // not a phi, not dominated by latch block.
-
-  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
-  // a block that is not dominated by the latch block, give up and use the
-  // preincremented value.
-  unsigned NumUses = 0;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == IV) {
-      ++NumUses;
-      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
-        return false;
-    }
-
-  // Okay, all uses of IV by PN are in predecessor blocks that really are
-  // dominated by the latch block.  Use the post-incremented value.
-  return true;
+  // Nothing else is interesting here.
+  return false;
 }
 
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
@@ -194,18 +82,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
 
   // Get the symbolic expression for this instruction.
   const SCEV *ISE = SE->getSCEV(I);
-  if (isa<SCEVCouldNotCompute>(ISE)) return false;
 
-  // Get the start and stride for this expression.
-  Loop *UseLoop = LI->getLoopFor(I->getParent());
-  const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType());
-  const SCEV *Stride = Start;
-
-  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
-    return false;  // Non-reducible symbolic expression, bail out.
-
-  // Keep things simple. Don't touch loop-variant strides.
-  if (!Stride->isLoopInvariant(L) && L->contains(I))
+  // If we've come to an uninteresting expression, stop the traversal and
+  // call this a user.
+  if (!isInteresting(ISE, I, L))
     return false;
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -241,27 +121,22 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     }
 
     if (AddUserToIVUsers) {
-      // Okay, we found a user that we cannot reduce.  Analyze the instruction
-      // and decide what to do with it.  If we are a use inside of the loop, use
-      // the value before incrementation, otherwise use it after incrementation.
-      if (IVUseShouldUsePostIncValue(User, I, L, DT)) {
-        // The value used will be incremented by the stride more than we are
-        // expecting, so subtract this off.
-        const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
-        IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
-        IVUses.back().setIsUseOfPostIncrementedValue(true);
-        DEBUG(dbgs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
-      } else {
-        IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
-      }
+      // Okay, we found a user that we cannot reduce.
+      IVUses.push_back(new IVStrideUse(this, User, I));
+      IVStrideUse &NewUse = IVUses.back();
+      // Transform the expression into a normalized form.
+      ISE = TransformForPostIncUse(NormalizeAutodetect,
+                                   ISE, User, I,
+                                   NewUse.PostIncLoops,
+                                   *SE, *DT);
+      DEBUG(dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
     }
   }
   return true;
 }
 
-IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
-                              Instruction *User, Value *Operand) {
-  IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
   return IVUses.back();
 }
 
@@ -287,40 +162,11 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    AddUsersIfInteresting(I);
+    (void)AddUsersIfInteresting(I);
 
   return false;
 }
 
-/// getReplacementExpr - Return a SCEV expression which computes the
-/// value of the OperandValToReplace of the given IVStrideUse.
-const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  // For uses of post-incremented values, add an extra stride to compute
-  // the actual replacement value.
-  if (U.isUseOfPostIncrementedValue())
-    RetVal = SE->getAddExpr(RetVal, U.getStride());
-  return RetVal;
-}
-
-/// getCanonicalExpr - Return a SCEV expression which computes the
-/// value of the SCEV of the given IVStrideUse, ignoring the 
-/// isUseOfPostIncrementedValue flag.
-const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  return RetVal;
-}
-
 void IVUsers::print(raw_ostream &OS, const Module *M) const {
   OS << "IV Users for loop ";
   WriteAsOperand(OS, L->getHeader(), false);
@@ -337,10 +183,14 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
        E = IVUses.end(); UI != E; ++UI) {
     OS << "  ";
     WriteAsOperand(OS, UI->getOperandValToReplace(), false);
-    OS << " = "
-       << *getReplacementExpr(*UI);
-    if (UI->isUseOfPostIncrementedValue())
-      OS << " (post-inc)";
+    OS << " = " << *getReplacementExpr(*UI);
+    for (PostIncLoopSet::const_iterator
+         I = UI->PostIncLoops.begin(),
+         E = UI->PostIncLoops.end(); I != E; ++I) {
+      OS << " (post-inc with loop ";
+      WriteAsOperand(OS, (*I)->getHeader(), false);
+      OS << ")";
+    }
     OS << " in  ";
     UI->getUser()->print(OS, &Annotator);
     OS << '\n';
@@ -356,6 +206,49 @@ void IVUsers::releaseMemory() {
   IVUses.clear();
 }
 
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+  return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+  return
+    TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+                           IU.getUser(), IU.getOperandValToReplace(),
+                           const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+                           *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    if (AR->getLoop() == L)
+      return AR;
+    return findAddRecForLoop(AR->getStart(), L);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+        return AR;
+    return 0;
+  }
+
+  return 0;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+  if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+    return AR->getStepRecurrence(*SE);
+  return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+  PostIncLoops.insert(L);
+}
+
 void IVStrideUse::deleted() {
   // Remove this user from the list.
   Parent->IVUses.erase(this);
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index c599e90..6271371 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -24,28 +24,29 @@ using namespace llvm;
 unsigned InlineCostAnalyzer::FunctionInfo::
 CountCodeReductionForConstant(Value *V) {
   unsigned Reduction = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<BranchInst>(*UI) || isa<SwitchInst>(*UI)) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
       // We will be able to eliminate all but one of the successors.
-      const TerminatorInst &TI = cast<TerminatorInst>(**UI);
+      const TerminatorInst &TI = cast<TerminatorInst>(*U);
       const unsigned NumSucc = TI.getNumSuccessors();
       unsigned Instrs = 0;
       for (unsigned I = 0; I != NumSucc; ++I)
         Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)];
       // We don't know which blocks will be eliminated, so use the average size.
       Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
-    } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
       // Turning an indirect call into a direct call is a BIG win
       if (CI->getCalledValue() == V)
         Reduction += InlineConstants::IndirectCallBonus;
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
       // Turning an indirect call into a direct call is a BIG win
       if (II->getCalledValue() == V)
         Reduction += InlineConstants::IndirectCallBonus;
     } else {
       // Figure out if this instruction will be removed due to simple constant
       // propagation.
-      Instruction &Inst = cast<Instruction>(**UI);
+      Instruction &Inst = cast<Instruction>(*U);
 
       // We can't constant propagate instructions which have effects or
       // read memory.
@@ -74,7 +75,7 @@ CountCodeReductionForConstant(Value *V) {
         Reduction += CountCodeReductionForConstant(&Inst);
       }
     }
-
+  }
   return Reduction;
 }
 
@@ -107,10 +108,10 @@ unsigned InlineCostAnalyzer::FunctionInfo::
   return Reduction;
 }
 
-// callIsSmall - If a call is likely to lower to a single target instruction, or
-// is otherwise deemed small return true.
-// TODO: Perhaps calls like memcpy, strcpy, etc?
-static bool callIsSmall(const Function *F) {
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
   if (!F) return false;
   
   if (F->hasLocalLinkage()) return false;
@@ -158,10 +159,18 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
       // it.  This is a hack because we depend on the user marking their local
       // variables as volatile if they are live across a setjmp call, and they
       // probably won't do this in callers.
-      if (Function *F = CS.getCalledFunction())
+      if (Function *F = CS.getCalledFunction()) {
         if (F->isDeclaration() && 
             (F->getName() == "setjmp" || F->getName() == "_setjmp"))
           NeverInline = true;
+       
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          NeverInline = true;
+      }
 
       if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
         // Each argument to a call takes on average one instruction to set up.
@@ -249,10 +258,16 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
 // function call or not.
 //
 InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               SmallPtrSet<const Function *, 16> &NeverInline) {
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               Function *Callee,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
   Instruction *TheCall = CS.getInstruction();
-  Function *Callee = CS.getCalledFunction();
   Function *Caller = TheCall->getParent()->getParent();
+  bool isDirectCall = CS.getCalledFunction() == Callee;
 
   // Don't inline functions which can be redefined at link-time to mean
   // something else.  Don't inline functions marked noinline or call sites
@@ -267,11 +282,11 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   // be inlined.  This value may go negative.
   //
   int InlineCost = 0;
-  
+
   // If there is only one call of the function, and it has internal linkage,
   // make it almost guaranteed to be inlined.
   //
-  if (Callee->hasLocalLinkage() && Callee->hasOneUse())
+  if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
     InlineCost += InlineConstants::LastCallToStaticBonus;
   
   // If this function uses the coldcc calling convention, prefer not to inline
@@ -288,31 +303,36 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
     InlineCost += InlineConstants::NoreturnPenalty;
   
-  // Get information about the callee...
-  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
   
   // If we haven't calculated this information yet, do so now.
-  if (CalleeFI.Metrics.NumBlocks == 0)
-    CalleeFI.analyzeFunction(Callee);
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
 
   // If we should never inline this, return a huge cost.
-  if (CalleeFI.Metrics.NeverInline)
+  if (CalleeFI->Metrics.NeverInline)
     return InlineCost::getNever();
 
-  // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we
+  // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
   // could move this up and avoid computing the FunctionInfo for
   // things we are going to just return always inline for. This
   // requires handling setjmp somewhere else, however.
   if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
     return InlineCost::getAlways();
     
-  if (CalleeFI.Metrics.usesDynamicAlloca) {
-    // Get infomation about the caller...
+  if (CalleeFI->Metrics.usesDynamicAlloca) {
+    // Get infomation about the caller.
     FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
 
     // If we haven't calculated this information yet, do so now.
-    if (CallerFI.Metrics.NumBlocks == 0)
+    if (CallerFI.Metrics.NumBlocks == 0) {
       CallerFI.analyzeFunction(Caller);
+     
+      // Recompute the CalleeFI pointer, getting Caller could have invalidated
+      // it.
+      CalleeFI = &CachedFunctionInfo[Callee];
+    }
 
     // Don't inline a callee with dynamic alloca into a caller without them.
     // Functions containing dynamic alloca's are inefficient in various ways;
@@ -339,15 +359,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
     // scalarization), so encourage the inlining of the function.
     //
     if (isa<AllocaInst>(I)) {
-      if (ArgNo < CalleeFI.ArgumentWeights.size())
-        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight;
+      if (ArgNo < CalleeFI->ArgumentWeights.size())
+        InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
 
       // If this is a constant being passed into the function, use the argument
       // weights calculated for the callee to determine how much will be folded
       // away with this information.
     } else if (isa<Constant>(I)) {
-      if (ArgNo < CalleeFI.ArgumentWeights.size())
-        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight;
+      if (ArgNo < CalleeFI->ArgumentWeights.size())
+        InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
     }
   }
   
@@ -355,10 +375,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   // likely to be inlined, look at factors that make us not want to inline it.
 
   // Calls usually take a long time, so they make the inlining gain smaller.
-  InlineCost += CalleeFI.Metrics.NumCalls * InlineConstants::CallPenalty;
+  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
 
   // Look at the size of the callee. Each instruction counts as 5.
-  InlineCost += CalleeFI.Metrics.NumInsts*InlineConstants::InstrCost;
+  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
 
   return llvm::InlineCost::get(InlineCost);
 }
@@ -368,7 +388,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
 float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
   Function *Callee = CS.getCalledFunction();
   
-  // Get information about the callee...
+  // Get information about the callee.
   FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
   
   // If we haven't calculated this information yet, do so now.
@@ -392,41 +412,49 @@ float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
 /// growCachedCostInfo - update the cached cost info for Caller after Callee has
 /// been inlined.
 void
-InlineCostAnalyzer::growCachedCostInfo(Function* Caller, Function* Callee) {
-  FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
+  CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
 
   // For small functions we prefer to recalculate the cost for better accuracy.
-  if (CallerFI.Metrics.NumBlocks < 10 || CallerFI.Metrics.NumInsts < 1000) {
+  if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) {
     resetCachedCostInfo(Caller);
     return;
   }
 
   // For large functions, we can save a lot of computation time by skipping
   // recalculations.
-  if (CallerFI.Metrics.NumCalls > 0)
-    --CallerFI.Metrics.NumCalls;
-
-  if (Callee) {
-    FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
-    if (!CalleeFI.Metrics.NumBlocks) {
-      resetCachedCostInfo(Caller);
-      return;
-    }
-    CallerFI.Metrics.NeverInline |= CalleeFI.Metrics.NeverInline;
-    CallerFI.Metrics.usesDynamicAlloca |= CalleeFI.Metrics.usesDynamicAlloca;
-
-    CallerFI.Metrics.NumInsts += CalleeFI.Metrics.NumInsts;
-    CallerFI.Metrics.NumBlocks += CalleeFI.Metrics.NumBlocks;
-    CallerFI.Metrics.NumCalls += CalleeFI.Metrics.NumCalls;
-    CallerFI.Metrics.NumVectorInsts += CalleeFI.Metrics.NumVectorInsts;
-    CallerFI.Metrics.NumRets += CalleeFI.Metrics.NumRets;
-
-    // analyzeBasicBlock counts each function argument as an inst.
-    if (CallerFI.Metrics.NumInsts >= Callee->arg_size())
-      CallerFI.Metrics.NumInsts -= Callee->arg_size();
-    else
-      CallerFI.Metrics.NumInsts = 0;
+  if (CallerMetrics.NumCalls > 0)
+    --CallerMetrics.NumCalls;
+
+  if (Callee == 0) return;
+  
+  CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+
+  // If we don't have metrics for the callee, don't recalculate them just to
+  // update an approximation in the caller.  Instead, just recalculate the
+  // caller info from scratch.
+  if (CalleeMetrics.NumBlocks == 0) {
+    resetCachedCostInfo(Caller);
+    return;
   }
+  
+  // Since CalleeMetrics were already calculated, we know that the CallerMetrics
+  // reference isn't invalidated: both were in the DenseMap.  
+  CallerMetrics.NeverInline |= CalleeMetrics.NeverInline;
+  CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
+
+  CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
+  CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
+  CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
+  CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
+  CallerMetrics.NumRets += CalleeMetrics.NumRets;
+
+  // analyzeBasicBlock counts each function argument as an inst.
+  if (CallerMetrics.NumInsts >= Callee->arg_size())
+    CallerMetrics.NumInsts -= Callee->arg_size();
+  else
+    CallerMetrics.NumInsts = 0;
+  
   // We are not updating the argumentweights. We have already determined that
   // Caller is a fairly large function, so we accept the loss of precision.
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 8288e96..dbefc2d 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -314,6 +314,35 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   return 0;
 }
 
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result.  If not, this returns null.
+Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
+                                const TargetData *TD) {
+  // select true, X, Y  -> X
+  // select false, X, Y -> Y
+  if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
+    return CB->getZExtValue() ? TrueVal : FalseVal;
+  
+  // select C, X, X -> X
+  if (TrueVal == FalseVal)
+    return TrueVal;
+  
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return FalseVal;
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return TrueVal;
+  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
+    if (isa<Constant>(TrueVal))
+      return TrueVal;
+    return FalseVal;
+  }
+  
+  
+  
+  return 0;
+}
+
+
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
 Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
@@ -391,6 +420,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
   case Instruction::FCmp:
     return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
                             I->getOperand(0), I->getOperand(1), TD);
+  case Instruction::Select:
+    return SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+                              I->getOperand(2), TD);
   case Instruction::GetElementPtr: {
     SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
     return SimplifyGEPInst(&Ops[0], Ops.size(), TD);
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
new file mode 100644
index 0000000..25d4f95
--- /dev/null
+++ b/lib/Analysis/Lint.cpp
@@ -0,0 +1,495 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+// 
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+// 
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+// 
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+  namespace MemRef {
+    static unsigned Read     = 1;
+    static unsigned Write    = 2;
+    static unsigned Callee   = 4;
+    static unsigned Branchee = 8;
+  }
+
+  class Lint : public FunctionPass, public InstVisitor<Lint> {
+    friend class InstVisitor<Lint>;
+
+    void visitFunction(Function &F);
+
+    void visitCallSite(CallSite CS);
+    void visitMemoryReference(Instruction &I, Value *Ptr, unsigned Align,
+                              const Type *Ty, unsigned Flags);
+
+    void visitCallInst(CallInst &I);
+    void visitInvokeInst(InvokeInst &I);
+    void visitReturnInst(ReturnInst &I);
+    void visitLoadInst(LoadInst &I);
+    void visitStoreInst(StoreInst &I);
+    void visitXor(BinaryOperator &I);
+    void visitSub(BinaryOperator &I);
+    void visitLShr(BinaryOperator &I);
+    void visitAShr(BinaryOperator &I);
+    void visitShl(BinaryOperator &I);
+    void visitSDiv(BinaryOperator &I);
+    void visitUDiv(BinaryOperator &I);
+    void visitSRem(BinaryOperator &I);
+    void visitURem(BinaryOperator &I);
+    void visitAllocaInst(AllocaInst &I);
+    void visitVAArgInst(VAArgInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
+    void visitExtractElementInst(ExtractElementInst &I);
+    void visitInsertElementInst(InsertElementInst &I);
+    void visitUnreachableInst(UnreachableInst &I);
+
+  public:
+    Module *Mod;
+    AliasAnalysis *AA;
+    TargetData *TD;
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    static char ID; // Pass identification, replacement for typeid
+    Lint() : FunctionPass(&ID), MessagesStr(Messages) {}
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<AliasAnalysis>();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const {}
+
+    void WriteValue(const Value *V) {
+      if (!V) return;
+      if (isa<Instruction>(V)) {
+        MessagesStr << *V << '\n';
+      } else {
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << '\n';
+      }
+    }
+
+    void WriteType(const Type *T) {
+      if (!T) return;
+      MessagesStr << ' ';
+      WriteTypeSymbolic(MessagesStr, T, Mod);
+    }
+
+    // CheckFailed - A check failed, so print out the condition and the message
+    // that failed.  This provides a nice place to put a breakpoint if you want
+    // to see why something is not correct.
+    void CheckFailed(const Twine &Message,
+                     const Value *V1 = 0, const Value *V2 = 0,
+                     const Value *V3 = 0, const Value *V4 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteValue(V2);
+      WriteValue(V3);
+      WriteValue(V4);
+    }
+
+    void CheckFailed(const Twine &Message, const Value *V1,
+                     const Type *T2, const Value *V3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteType(T2);
+      WriteValue(V3);
+    }
+
+    void CheckFailed(const Twine &Message, const Type *T1,
+                     const Type *T2 = 0, const Type *T3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteType(T1);
+      WriteType(T2);
+      WriteType(T3);
+    }
+  };
+}
+
+char Lint::ID = 0;
+static RegisterPass<Lint>
+X("lint", "Statically lint-checks LLVM IR", false, true);
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+    do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+    do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+    do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+  Mod = F.getParent();
+  AA = &getAnalysis<AliasAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  visit(F);
+  dbgs() << MessagesStr.str();
+  return false;
+}
+
+void Lint::visitFunction(Function &F) {
+  // This isn't undefined behavior, it's just a little unusual, and it's a
+  // fairly common mistake to neglect to name a function.
+  Assert1(F.hasName() || F.hasLocalLinkage(),
+          "Unusual: Unnamed function with non-local linkage", &F);
+}
+
+void Lint::visitCallSite(CallSite CS) {
+  Instruction &I = *CS.getInstruction();
+  Value *Callee = CS.getCalledValue();
+
+  // TODO: Check function alignment?
+  visitMemoryReference(I, Callee, 0, 0, MemRef::Callee);
+
+  if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) {
+    Assert1(CS.getCallingConv() == F->getCallingConv(),
+            "Undefined behavior: Caller and callee calling convention differ",
+            &I);
+
+    const FunctionType *FT = F->getFunctionType();
+    unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+
+    Assert1(FT->isVarArg() ?
+              FT->getNumParams() <= NumActualArgs :
+              FT->getNumParams() == NumActualArgs,
+            "Undefined behavior: Call argument count mismatches callee "
+            "argument count", &I);
+      
+    // TODO: Check argument types (in case the callee was casted)
+
+    // TODO: Check ABI-significant attributes.
+
+    // TODO: Check noalias attribute.
+
+    // TODO: Check sret attribute.
+  }
+
+  // TODO: Check the "tail" keyword constraints.
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+    switch (II->getIntrinsicID()) {
+    default: break;
+
+    // TODO: Check more intrinsics
+
+    case Intrinsic::memcpy: {
+      MemCpyInst *MCI = cast<MemCpyInst>(&I);
+      visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0,
+                           MemRef::Read);
+
+      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+      // isn't expressive enough for what we really want to do. Known partial
+      // overlap is not distinguished from the case where nothing is known.
+      unsigned Size = 0;
+      if (const ConstantInt *Len =
+            dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts()))
+        if (Len->getValue().isIntN(32))
+          Size = Len->getValue().getZExtValue();
+      Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+              AliasAnalysis::MustAlias,
+              "Undefined behavior: memcpy source and destination overlap", &I);
+      break;
+    }
+    case Intrinsic::memmove: {
+      MemMoveInst *MMI = cast<MemMoveInst>(&I);
+      visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0,
+                           MemRef::Read);
+      break;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MSI = cast<MemSetInst>(&I);
+      visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0,
+                           MemRef::Write);
+      break;
+    }
+
+    case Intrinsic::vastart:
+      Assert1(I.getParent()->getParent()->isVarArg(),
+              "Undefined behavior: va_start called in a non-varargs function",
+              &I);
+
+      visitMemoryReference(I, CS.getArgument(0), 0, 0,
+                           MemRef::Read | MemRef::Write);
+      break;
+    case Intrinsic::vacopy:
+      visitMemoryReference(I, CS.getArgument(0), 0, 0, MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(1), 0, 0, MemRef::Read);
+      break;
+    case Intrinsic::vaend:
+      visitMemoryReference(I, CS.getArgument(0), 0, 0,
+                           MemRef::Read | MemRef::Write);
+      break;
+
+    case Intrinsic::stackrestore:
+      visitMemoryReference(I, CS.getArgument(0), 0, 0,
+                           MemRef::Read);
+      break;
+    }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+  Function *F = I.getParent()->getParent();
+  Assert1(!F->doesNotReturn(),
+          "Unusual: Return statement in function with noreturn attribute",
+          &I);
+}
+
+// TODO: Add a length argument and check that the reference is in bounds
+void Lint::visitMemoryReference(Instruction &I,
+                                Value *Ptr, unsigned Align, const Type *Ty,
+                                unsigned Flags) {
+  Value *UnderlyingObject = Ptr->getUnderlyingObject();
+  Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
+          "Undefined behavior: Null pointer dereference", &I);
+  Assert1(!isa<UndefValue>(UnderlyingObject),
+          "Undefined behavior: Undef pointer dereference", &I);
+
+  if (Flags & MemRef::Write) {
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+      Assert1(!GV->isConstant(),
+              "Undefined behavior: Write to read-only memory", &I);
+    Assert1(!isa<Function>(UnderlyingObject) &&
+            !isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Write to text section", &I);
+  }
+  if (Flags & MemRef::Read) {
+    Assert1(!isa<Function>(UnderlyingObject),
+            "Unusual: Load from function body", &I);
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Load from block address", &I);
+  }
+  if (Flags & MemRef::Callee) {
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Call to block address", &I);
+  }
+  if (Flags & MemRef::Branchee) {
+    Assert1(!isa<Constant>(UnderlyingObject) ||
+            isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Branch to non-blockaddress", &I);
+  }
+
+  if (TD) {
+    if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty);
+
+    if (Align != 0) {
+      unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
+      APInt Mask = APInt::getAllOnesValue(BitWidth),
+                   KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+      Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
+              "Undefined behavior: Memory reference address is misaligned", &I);
+    }
+  }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType(),
+                       MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(),
+                  I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, TargetData *TD) {
+  // Assume undef could be zero.
+  if (isa<UndefValue>(V)) return true;
+
+  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+  APInt Mask = APInt::getAllOnesValue(BitWidth),
+               KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  return KnownZero.isAllOnesValue();
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+  if (isa<ConstantInt>(I.getArraySize()))
+    // This isn't undefined behavior, it's just an obvious pessimization.
+    Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+            "Pessimization: Static alloca outside of entry block", &I);
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+  visitMemoryReference(I, I.getOperand(0), 0, 0,
+                       MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+  visitMemoryReference(I, I.getAddress(), 0, 0, MemRef::Branchee);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getIndexOperand()->stripPointerCasts()))
+    Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+            "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(I.getOperand(2)->stripPointerCasts()))
+    Assert1(CI->getValue().ult(I.getType()->getNumElements()),
+            "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+  // This isn't undefined behavior, it's merely suspicious.
+  Assert1(&I == I.getParent()->begin() ||
+          prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+          "Unusual: unreachable immediately preceded by instruction without "
+          "side effects", &I);
+}
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+  return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot lint external functions");
+
+  FunctionPassManager FPM(F.getParent());
+  Lint *V = new Lint();
+  FPM.add(V);
+  FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+void llvm::lintModule(const Module &M, std::string *ErrorInfo) {
+  PassManager PM;
+  Lint *V = new Lint();
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+
+  if (ErrorInfo)
+    *ErrorInfo = V->MessagesStr.str();
+}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 1001d2b..735e31f 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -29,9 +29,9 @@ using namespace llvm;
 
 // Always verify loopinfo if expensive checking is enabled.
 #ifdef XDEBUG
-bool VerifyLoopInfo = true;
+static bool VerifyLoopInfo = true;
 #else
-bool VerifyLoopInfo = false;
+static bool VerifyLoopInfo = false;
 #endif
 static cl::opt<bool,true>
 VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index ce7ac89..14df0b7 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -183,17 +183,17 @@ enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
                                                    Predicate Pred,
                                                    const SCEV *A,
                                                    const SCEV *B) const {
-  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
     return AlwaysTrue;
   Pred = ICmpInst::getSwappedPredicate(Pred);
-  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
     return AlwaysTrue;
 
   Pred = ICmpInst::getInversePredicate(Pred);
-  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
     return AlwaysFalse;
   Pred = ICmpInst::getSwappedPredicate(Pred);
-  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+  if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
     return AlwaysTrue;
   return Unknown;
 }
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index c38e050..f0f3a05 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -33,7 +33,6 @@ F("postdomtree", "Post-Dominator Tree Construction", true, true);
 
 bool PostDominatorTree::runOnFunction(Function &F) {
   DT->recalculate(F);
-  DEBUG(DT->print(dbgs()));
   return false;
 }
 
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt
index c401090..0e96e4c 100644
--- a/lib/Analysis/README.txt
+++ b/lib/Analysis/README.txt
@@ -16,3 +16,15 @@ In addition to being much more complicated, it involves i65 arithmetic,
 which is very inefficient when expanded into code.
 
 //===---------------------------------------------------------------------===//
+
+In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll,
+
+ScalarEvolution is forming this expression:
+
+((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32)))
+
+This could be folded to
+
+(-1 * (trunc i64 undef to i32))
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 1af271a..6870268 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -188,8 +188,8 @@ const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
 
 const SCEV *
 ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
-  return getConstant(
-    ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
+  const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+  return getConstant(ConstantInt::get(ITy, V, isSigned));
 }
 
 const Type *SCEVConstant::getType() const { return V->getType(); }
@@ -247,11 +247,13 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const {
 }
 
 void SCEVCommutativeExpr::print(raw_ostream &OS) const {
-  assert(NumOperands > 1 && "This plus expr shouldn't exist!");
   const char *OpStr = getOperationStr();
-  OS << "(" << *Operands[0];
-  for (unsigned i = 1, e = NumOperands; i != e; ++i)
-    OS << OpStr << *Operands[i];
+  OS << "(";
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
+    OS << **I;
+    if (next(I) != E)
+      OS << OpStr;
+  }
   OS << ")";
 }
 
@@ -759,7 +761,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
                                                       CalculationBits);
   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
   for (unsigned i = 1; i != K; ++i) {
-    const SCEV *S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType()));
+    const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
     Dividend = SE.getMulExpr(Dividend,
                              SE.getTruncateOrZeroExtend(S, CalculationTy));
   }
@@ -955,7 +957,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
                                       getUnsignedRange(Step).getUnsignedMax());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -965,8 +967,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
         } else if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
                                       getSignedRange(Step).getSignedMin());
-          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) &&
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) ||
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -1090,7 +1092,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
                                       getSignedRange(Step).getSignedMax());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -1101,7 +1103,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
                                       getSignedRange(Step).getSignedMin());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
-              (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
                                            AR->getPostIncExpr(*this), N)))
             // Return the expression with the addrec on the outside.
@@ -1237,7 +1239,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
       }
     } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
       // Pull a buried constant out to the outside.
-      if (Scale != 1 || AccumulatedConstant != 0 || C->isZero())
+      if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
         Interesting = true;
       AccumulatedConstant += Scale * C->getValue()->getValue();
     } else {
@@ -1308,13 +1310,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     }
 
     // If we are left with a constant zero being added, strip it off.
-    if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+    if (LHSC->getValue()->isZero()) {
       Ops.erase(Ops.begin());
       --Idx;
     }
-  }
 
-  if (Ops.size() == 1) return Ops[0];
+    if (Ops.size() == 1) return Ops[0];
+  }
 
   // Okay, check to see if the same value occurs in the operand list twice.  If
   // so, merge them together into an multiply expression.  Since we sorted the
@@ -1324,7 +1326,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
       // Found a match, merge the two values into a multiply, and add any
       // remaining values to the result.
-      const SCEV *Two = getIntegerSCEV(2, Ty);
+      const SCEV *Two = getConstant(Ty, 2);
       const SCEV *Mul = getMulExpr(Ops[i], Two);
       if (Ops.size() == 2)
         return Mul;
@@ -1353,9 +1355,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
         }
         LargeOps.push_back(T->getOperand());
       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
-        // This could be either sign or zero extension, but sign extension
-        // is much more likely to be foldable here.
-        LargeOps.push_back(getSignExtendExpr(C, SrcType));
+        LargeOps.push_back(getAnyExtendExpr(C, SrcType));
       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
         SmallVector<const SCEV *, 8> LargeMulOps;
         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
@@ -1368,9 +1368,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             LargeMulOps.push_back(T->getOperand());
           } else if (const SCEVConstant *C =
                        dyn_cast<SCEVConstant>(M->getOperand(j))) {
-            // This could be either sign or zero extension, but sign extension
-            // is much more likely to be foldable here.
-            LargeMulOps.push_back(getSignExtendExpr(C, SrcType));
+            LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
           } else {
             Ok = false;
             break;
@@ -1445,7 +1443,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
           Ops.push_back(getMulExpr(getConstant(I->first),
                                    getAddExpr(I->second)));
       if (Ops.empty())
-        return getIntegerSCEV(0, Ty);
+        return getConstant(Ty, 0);
       if (Ops.size() == 1)
         return Ops[0];
       return getAddExpr(Ops);
@@ -1470,7 +1468,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             MulOps.erase(MulOps.begin()+MulOp);
             InnerMul = getMulExpr(MulOps);
           }
-          const SCEV *One = getIntegerSCEV(1, Ty);
+          const SCEV *One = getConstant(Ty, 1);
           const SCEV *AddOne = getAddExpr(InnerMul, One);
           const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]);
           if (Ops.size() == 2) return OuterMul;
@@ -1534,8 +1532,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     // they are loop invariant w.r.t. the recurrence.
     SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
+      if (Ops[i]->isLoopInvariant(AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
@@ -1552,7 +1551,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 
       // It's tempting to propagate NUW/NSW flags here, but nuw/nsw addition
       // is not associative so this isn't necessarily safe.
-      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1573,7 +1572,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
       if (OtherIdx != Idx) {
         const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
-        if (AddRec->getLoop() == OtherAddRec->getLoop()) {
+        if (AddRecLoop == OtherAddRec->getLoop()) {
           // Other + {A,+,B} + {C,+,D}  -->  Other + {A+C,+,B+D}
           SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(),
                                               AddRec->op_end());
@@ -1585,7 +1584,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             }
             NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i));
           }
-          const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop());
+          const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRecLoop);
 
           if (Ops.size() == 2) return NewAddRec;
 
@@ -1697,15 +1696,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
             return getAddExpr(NewOps);
         }
     }
+
+    if (Ops.size() == 1)
+      return Ops[0];
   }
 
   // Skip over the add expression until we get to a multiply.
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
     ++Idx;
 
-  if (Ops.size() == 1)
-    return Ops[0];
-
   // If there are mul operands inline them all into this expression.
   if (Idx < Ops.size()) {
     bool DeletedMul = false;
@@ -1843,77 +1842,81 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
     if (RHSC->getValue()->equalsInt(1))
       return LHS;                               // X udiv 1 --> x
-    if (RHSC->isZero())
-      return getIntegerSCEV(0, LHS->getType()); // value is undefined
-
-    // Determine if the division can be folded into the operands of
-    // its operands.
-    // TODO: Generalize this to non-constants by using known-bits information.
-    const Type *Ty = LHS->getType();
-    unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
-    unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ;
-    // For non-power-of-two values, effectively round the value up to the
-    // nearest power of two.
-    if (!RHSC->getValue()->getValue().isPowerOf2())
-      ++MaxShiftAmt;
-    const IntegerType *ExtTy =
-      IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
-    // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
-      if (const SCEVConstant *Step =
-            dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
-        if (!Step->getValue()->getValue()
-              .urem(RHSC->getValue()->getValue()) &&
-            getZeroExtendExpr(AR, ExtTy) ==
-            getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
-                          getZeroExtendExpr(Step, ExtTy),
-                          AR->getLoop())) {
-          SmallVector<const SCEV *, 4> Operands;
-          for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
-            Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
-          return getAddRecExpr(Operands, AR->getLoop());
-        }
-    // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
-    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
-      SmallVector<const SCEV *, 4> Operands;
-      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
-        Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
-      if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
-        // Find an operand that's safely divisible.
-        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
-          const SCEV *Op = M->getOperand(i);
-          const SCEV *Div = getUDivExpr(Op, RHSC);
-          if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            Operands = SmallVector<const SCEV *, 4>(M->op_begin(), M->op_end());
-            Operands[i] = Div;
-            return getMulExpr(Operands);
+    // If the denominator is zero, the result of the udiv is undefined. Don't
+    // try to analyze it, because the resolution chosen here may differ from
+    // the resolution chosen in other parts of the compiler.
+    if (!RHSC->getValue()->isZero()) {
+      // Determine if the division can be folded into the operands of
+      // its operands.
+      // TODO: Generalize this to non-constants by using known-bits information.
+      const Type *Ty = LHS->getType();
+      unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+      unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ;
+      // For non-power-of-two values, effectively round the value up to the
+      // nearest power of two.
+      if (!RHSC->getValue()->getValue().isPowerOf2())
+        ++MaxShiftAmt;
+      const IntegerType *ExtTy =
+        IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+      // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+        if (const SCEVConstant *Step =
+              dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
+          if (!Step->getValue()->getValue()
+                .urem(RHSC->getValue()->getValue()) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop())) {
+            SmallVector<const SCEV *, 4> Operands;
+            for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+              Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+            return getAddRecExpr(Operands, AR->getLoop());
           }
+      // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+      if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+          // Find an operand that's safely divisible.
+          for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = M->getOperand(i);
+            const SCEV *Div = getUDivExpr(Op, RHSC);
+            if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+              Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+                                                      M->op_end());
+              Operands[i] = Div;
+              return getMulExpr(Operands);
+            }
+          }
+      }
+      // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+      if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+          Operands.clear();
+          for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+            if (isa<SCEVUDivExpr>(Op) ||
+                getMulExpr(Op, RHS) != A->getOperand(i))
+              break;
+            Operands.push_back(Op);
+          }
+          if (Operands.size() == A->getNumOperands())
+            return getAddExpr(Operands);
         }
-    }
-    // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
-    if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
-      SmallVector<const SCEV *, 4> Operands;
-      for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
-        Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
-      if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
-        Operands.clear();
-        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
-          const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
-          if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i))
-            break;
-          Operands.push_back(Op);
-        }
-        if (Operands.size() == A->getNumOperands())
-          return getAddExpr(Operands);
       }
-    }
 
-    // Fold if both operands are constant.
-    if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
-      Constant *LHSCV = LHSC->getValue();
-      Constant *RHSCV = RHSC->getValue();
-      return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
-                                                                 RHSCV)));
+      // Fold if both operands are constant.
+      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+        Constant *LHSCV = LHSC->getValue();
+        Constant *RHSCV = RHSC->getValue();
+        return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+                                                                   RHSCV)));
+      }
     }
   }
 
@@ -2090,9 +2093,9 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
       // maximum-int.
       return Ops[0];
     }
-  }
 
-  if (Ops.size() == 1) return Ops[0];
+    if (Ops.size() == 1) return Ops[0];
+  }
 
   // Find the first SMax
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
@@ -2116,7 +2119,13 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   // so, delete one.  Since we sorted the list, these values are required to
   // be adjacent.
   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
-    if (Ops[i] == Ops[i+1]) {      //  X smax Y smax Y  -->  X smax Y
+    //  X smax Y smax Y  -->  X smax Y
+    //  X smax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
       --i; --e;
     }
@@ -2189,9 +2198,9 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
       // maximum-int.
       return Ops[0];
     }
-  }
 
-  if (Ops.size() == 1) return Ops[0];
+    if (Ops.size() == 1) return Ops[0];
+  }
 
   // Find the first UMax
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
@@ -2215,7 +2224,13 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   // so, delete one.  Since we sorted the list, these values are required to
   // be adjacent.
   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
-    if (Ops[i] == Ops[i+1]) {      //  X umax Y umax Y  -->  X umax Y
+    //  X umax Y umax Y  -->  X umax Y
+    //  X umax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
       --i; --e;
     }
@@ -2254,6 +2269,13 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getTypeAllocSize(AllocTy));
+
   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     C = ConstantFoldConstantExpression(CE, TD);
@@ -2271,6 +2293,13 @@ const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
 
 const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
                                              unsigned FieldNo) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getStructLayout(STy)->getElementOffset(FieldNo));
+
   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     C = ConstantFoldConstantExpression(CE, TD);
@@ -2597,14 +2626,29 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
 /// a loop header, making it a potential recurrence, or it doesn't.
 ///
 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
-  if (PN->getNumIncomingValues() == 2)  // The loops have been canonicalized.
-    if (const Loop *L = LI->getLoopFor(PN->getParent()))
-      if (L->getHeader() == PN->getParent()) {
-        // If it lives in the loop header, it has two incoming values, one
-        // from outside the loop, and one from inside.
-        unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
-        unsigned BackEdge     = IncomingEdge^1;
-
+  if (const Loop *L = LI->getLoopFor(PN->getParent()))
+    if (L->getHeader() == PN->getParent()) {
+      // The loop may have multiple entrances or multiple exits; we can analyze
+      // this phi as an addrec if it has a unique entry value and a unique
+      // backedge value.
+      Value *BEValueV = 0, *StartValueV = 0;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *V = PN->getIncomingValue(i);
+        if (L->contains(PN->getIncomingBlock(i))) {
+          if (!BEValueV) {
+            BEValueV = V;
+          } else if (BEValueV != V) {
+            BEValueV = 0;
+            break;
+          }
+        } else if (!StartValueV) {
+          StartValueV = V;
+        } else if (StartValueV != V) {
+          StartValueV = 0;
+          break;
+        }
+      }
+      if (BEValueV && StartValueV) {
         // While we are analyzing this PHI node, handle its value symbolically.
         const SCEV *SymbolicName = getUnknown(PN);
         assert(Scalars.find(PN) == Scalars.end() &&
@@ -2613,7 +2657,6 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
         // Using this symbolic name for the PHI, analyze the value coming around
         // the back-edge.
-        Value *BEValueV = PN->getIncomingValue(BackEdge);
         const SCEV *BEValue = getSCEV(BEValueV);
 
         // NOTE: If BEValue is loop invariant, we know that the PHI node just
@@ -2657,8 +2700,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                   HasNSW = true;
               }
 
-              const SCEV *StartVal =
-                getSCEV(PN->getIncomingValue(IncomingEdge));
+              const SCEV *StartVal = getSCEV(StartValueV);
               const SCEV *PHISCEV =
                 getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW);
 
@@ -2684,12 +2726,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
           // Because the other in-value of i (0) fits the evolution of BEValue
           // i really is an addrec evolution.
           if (AddRec->getLoop() == L && AddRec->isAffine()) {
-            const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
+            const SCEV *StartVal = getSCEV(StartValueV);
 
             // If StartVal = j.start - j.stride, we can use StartVal as the
             // initial step of the addrec evolution.
             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
-                                            AddRec->getOperand(1))) {
+                                         AddRec->getOperand(1))) {
               const SCEV *PHISCEV =
                  getAddRecExpr(StartVal, AddRec->getOperand(1), L);
 
@@ -2702,9 +2744,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
             }
           }
         }
-
-        return SymbolicName;
       }
+    }
 
   // If the PHI has a single incoming value, follow that value, unless the
   // PHI's incoming blocks are in a different loop, in which case doing so
@@ -2737,7 +2778,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   // Don't attempt to analyze GEPs over unsized objects.
   if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
     return getUnknown(GEP);
-  const SCEV *TotalOffset = getIntegerSCEV(0, IntPtrTy);
+  const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()),
                                       E = GEP->op_end();
@@ -2920,9 +2961,9 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
     // initial value.
     if (AddRec->hasNoUnsignedWrap())
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
-        ConservativeResult =
-          ConstantRange(C->getValue()->getValue(),
-                        APInt(getTypeSizeInBits(C->getType()), 0));
+        if (!C->getValue()->isZero())
+          ConservativeResult =
+            ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0));
 
     // TODO: non-affine addrec
     if (AddRec->isAffine()) {
@@ -2933,14 +2974,26 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
 
-        // Check for overflow.
-        if (!AddRec->hasNoUnsignedWrap())
+        ConstantRange StartRange = getUnsignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
           return ConservativeResult;
 
-        ConstantRange StartRange = getUnsignedRange(Start);
-        ConstantRange EndRange = getUnsignedRange(End);
         APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
                                    EndRange.getUnsignedMin());
         APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
@@ -3064,14 +3117,26 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
 
-        // Check for overflow.
-        if (!AddRec->hasNoSignedWrap())
+        ConstantRange StartRange = getSignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
           return ConservativeResult;
 
-        ConstantRange StartRange = getSignedRange(Start);
-        ConstantRange EndRange = getSignedRange(End);
         APInt Min = APIntOps::smin(StartRange.getSignedMin(),
                                    EndRange.getSignedMin());
         APInt Max = APIntOps::smax(StartRange.getSignedMax(),
@@ -3122,9 +3187,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   else if (isa<ConstantPointerNull>(V))
-    return getIntegerSCEV(0, V->getType());
-  else if (isa<UndefValue>(V))
-    return getIntegerSCEV(0, V->getType());
+    return getConstant(V->getType(), 0);
   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
   else
@@ -3256,8 +3319,16 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // Turn shift left of a constant amount into a multiply.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
       Constant *X = ConstantInt::get(getContext(),
-        APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
     break;
@@ -3266,8 +3337,16 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // Turn logical shift right of a constant into a unsigned divide.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
       Constant *X = ConstantInt::get(getContext(),
-        APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
     }
     break;
@@ -3275,19 +3354,26 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
   case Instruction::AShr:
     // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
-      if (Instruction *L = dyn_cast<Instruction>(U->getOperand(0)))
+      if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
         if (L->getOpcode() == Instruction::Shl &&
             L->getOperand(1) == U->getOperand(1)) {
-          unsigned BitWidth = getTypeSizeInBits(U->getType());
+          uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+          // If the shift count is not less than the bitwidth, the result of
+          // the shift is undefined. Don't try to analyze it, because the
+          // resolution chosen here may differ from the resolution chosen in
+          // other parts of the compiler.
+          if (CI->getValue().uge(BitWidth))
+            break;
+
           uint64_t Amt = BitWidth - CI->getZExtValue();
           if (Amt == BitWidth)
             return getSCEV(L->getOperand(0));       // shift by zero --> noop
-          if (Amt > BitWidth)
-            return getIntegerSCEV(0, U->getType()); // value is undefined
           return
             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
-                                           IntegerType::get(getContext(), Amt)),
-                                 U->getType());
+                                              IntegerType::get(getContext(),
+                                                               Amt)),
+                              U->getType());
         }
     break;
 
@@ -3330,10 +3416,22 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         // fall through
       case ICmpInst::ICMP_SGT:
       case ICmpInst::ICMP_SGE:
-        if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
-          return getSMaxExpr(getSCEV(LHS), getSCEV(RHS));
-        else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          return getSMinExpr(getSCEV(LHS), getSCEV(RHS));
+        // a >s b ? a+x : b+x  ->  smax(a, b)+x
+        // a >s b ? b+x : a+x  ->  smin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMinExpr(LS, RS), LDiff);
+        }
         break;
       case ICmpInst::ICMP_ULT:
       case ICmpInst::ICMP_ULE:
@@ -3341,28 +3439,52 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         // fall through
       case ICmpInst::ICMP_UGT:
       case ICmpInst::ICMP_UGE:
-        if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
-          return getUMaxExpr(getSCEV(LHS), getSCEV(RHS));
-        else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          return getUMinExpr(getSCEV(LHS), getSCEV(RHS));
+        // a >u b ? a+x : b+x  ->  umax(a, b)+x
+        // a >u b ? b+x : a+x  ->  umin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMinExpr(LS, RS), LDiff);
+        }
         break;
       case ICmpInst::ICMP_NE:
-        // n != 0 ? n : 1  ->  umax(n, 1)
-        if (LHS == U->getOperand(1) &&
-            isa<ConstantInt>(U->getOperand(2)) &&
-            cast<ConstantInt>(U->getOperand(2))->isOne() &&
+        // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
             isa<ConstantInt>(RHS) &&
-            cast<ConstantInt>(RHS)->isZero())
-          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(2)));
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, One);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, One), LDiff);
+        }
         break;
       case ICmpInst::ICMP_EQ:
-        // n == 0 ? 1 : n  ->  umax(n, 1)
-        if (LHS == U->getOperand(2) &&
-            isa<ConstantInt>(U->getOperand(1)) &&
-            cast<ConstantInt>(U->getOperand(1))->isOne() &&
+        // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
             isa<ConstantInt>(RHS) &&
-            cast<ConstantInt>(RHS)->isZero())
-          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(1)));
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, One);
+          const SCEV *RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, One), LDiff);
+        }
         break;
       default:
         break;
@@ -3739,7 +3861,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
       return getCouldNotCompute();
     else
       // The backedge is never taken.
-      return getIntegerSCEV(0, CI->getType());
+      return getConstant(CI->getType(), 0);
   }
 
   // If it's not an integer or pointer comparison then compute it the hard way.
@@ -3786,6 +3908,9 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
     Cond = ICmpInst::getSwappedPredicate(Cond);
   }
 
+  // Simplify the operands before analyzing them.
+  (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
   // If we have a comparison of a chrec against a constant, try to use value
   // ranges to answer this query.
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
@@ -4067,7 +4192,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
   if (I != ConstantEvolutionLoopExitValue.end())
     return I->second;
 
-  if (BEs.ugt(APInt(BEs.getBitWidth(),MaxBruteForceIterations)))
+  if (BEs.ugt(MaxBruteForceIterations))
     return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
 
   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
@@ -4562,7 +4687,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   // already.  If so, the backedge will execute zero times.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     if (!C->getValue()->isNullValue())
-      return getIntegerSCEV(0, C->getType());
+      return getConstant(C->getType(), 0);
     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
 
@@ -4573,6 +4698,8 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
 
 /// getLoopPredecessor - If the given loop's header has exactly one unique
 /// predecessor outside the loop, return it. Otherwise return null.
+/// This is less strict that the loop "preheader" concept, which requires
+/// the predecessor to have only one single successor.
 ///
 BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) {
   BasicBlock *Header = L->getHeader();
@@ -4591,21 +4718,21 @@ BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) {
 /// successor from which BB is reachable, or null if no such block is
 /// found.
 ///
-BasicBlock *
+std::pair<BasicBlock *, BasicBlock *>
 ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   // If the block has a unique predecessor, then there is no path from the
   // predecessor to the block that does not go through the direct edge
   // from the predecessor to the block.
   if (BasicBlock *Pred = BB->getSinglePredecessor())
-    return Pred;
+    return std::make_pair(Pred, BB);
 
   // A loop's header is defined to be a block that dominates the loop.
   // If the header has a unique predecessor outside the loop, it must be
   // a block that has exactly one successor that can reach the loop.
   if (Loop *L = LI->getLoopFor(BB))
-    return getLoopPredecessor(L);
+    return std::make_pair(getLoopPredecessor(L), L->getHeader());
 
-  return 0;
+  return std::pair<BasicBlock *, BasicBlock *>();
 }
 
 /// HasSameValue - SCEV structural equivalence is usually sufficient for
@@ -4631,6 +4758,266 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
   return false;
 }
 
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+                                           const SCEV *&LHS, const SCEV *&RHS) {
+  bool Changed = false;
+
+  // Canonicalize a constant to the right side.
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+    // Check for both operands constant.
+    if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+      if (ConstantExpr::getICmp(Pred,
+                                LHSC->getValue(),
+                                RHSC->getValue())->isNullValue())
+        goto trivially_false;
+      else
+        goto trivially_true;
+    }
+    // Otherwise swap the operands to put the constant on the right.
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+    Changed = true;
+  }
+
+  // If we're comparing an addrec with a value which is loop-invariant in the
+  // addrec's loop, put the addrec on the left. Also make a dominance check,
+  // as both operands could be addrecs loop-invariant in each other's loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+    const Loop *L = AR->getLoop();
+    if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      Changed = true;
+    }
+  }
+
+  // If there's a constant operand, canonicalize comparisons with boundary
+  // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+    const APInt &RA = RC->getValue()->getValue();
+    switch (Pred) {
+    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_NE:
+      break;
+    case ICmpInst::ICMP_UGE:
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_UGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_ULT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if ((RA - 1).isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SLT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinSignedValue()) {
+       Pred = ICmpInst::ICMP_EQ;
+       RHS = getConstant(RA - 1);
+        Changed = true;
+       break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_false;
+      break;
+    }
+  }
+
+  // Check for obvious equality.
+  if (HasSameValue(LHS, RHS)) {
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      goto trivially_true;
+    if (ICmpInst::isFalseWhenEqual(Pred))
+      goto trivially_false;
+  }
+
+  // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+  // adding or subtracting 1 from one of the operands.
+  switch (Pred) {
+  case ICmpInst::ICMP_SLE:
+    if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_UGE:
+    if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  // TODO: More simplifications are possible here.
+
+  return Changed;
+
+trivially_true:
+  // Return 0 == 0.
+  LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+  Pred = ICmpInst::ICMP_EQ;
+  return true;
+
+trivially_false:
+  // Return 0 != 0.
+  LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+  Pred = ICmpInst::ICMP_NE;
+  return true;
+}
+
 bool ScalarEvolution::isKnownNegative(const SCEV *S) {
   return getSignedRange(S).getSignedMax().isNegative();
 }
@@ -4653,10 +5040,36 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
 
 bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
                                        const SCEV *LHS, const SCEV *RHS) {
+  // Canonicalize the inputs first.
+  (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+  // If LHS or RHS is an addrec, check to see if the condition is true in
+  // every iteration of the loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, AR->getStart(), RHS) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
+      return true;
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getStart()) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
+      return true;
+
+  // Otherwise see what can be done with known constant ranges.
+  return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
 
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS) {
   if (HasSameValue(LHS, RHS))
     return ICmpInst::isTrueWhenEqual(Pred);
 
+  // This code is split out from isKnownPredicate because it is called from
+  // within isLoopEntryGuardedByCond.
   switch (Pred) {
   default:
     llvm_unreachable("Unexpected ICmpInst::Predicate value!");
@@ -4753,35 +5166,33 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
                        LoopContinuePredicate->getSuccessor(0) != L->getHeader());
 }
 
-/// isLoopGuardedByCond - Test whether entry to the loop is protected
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
 /// by a conditional between LHS and RHS.  This is used to help avoid max
 /// expressions in loop trip counts, and to eliminate casts.
 bool
-ScalarEvolution::isLoopGuardedByCond(const Loop *L,
-                                     ICmpInst::Predicate Pred,
-                                     const SCEV *LHS, const SCEV *RHS) {
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+                                          ICmpInst::Predicate Pred,
+                                          const SCEV *LHS, const SCEV *RHS) {
   // Interpret a null as meaning no loop, where there is obviously no guard
   // (interprocedural conditions notwithstanding).
   if (!L) return false;
 
-  BasicBlock *Predecessor = getLoopPredecessor(L);
-  BasicBlock *PredecessorDest = L->getHeader();
-
   // Starting at the loop predecessor, climb up the predecessor chain, as long
   // as there are predecessors that can be found that have unique successors
   // leading to the original header.
-  for (; Predecessor;
-       PredecessorDest = Predecessor,
-       Predecessor = getPredecessorWithUniqueSuccessorForBB(Predecessor)) {
+  for (std::pair<BasicBlock *, BasicBlock *>
+         Pair(getLoopPredecessor(L), L->getHeader());
+       Pair.first;
+       Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
 
     BranchInst *LoopEntryPredicate =
-      dyn_cast<BranchInst>(Predecessor->getTerminator());
+      dyn_cast<BranchInst>(Pair.first->getTerminator());
     if (!LoopEntryPredicate ||
         LoopEntryPredicate->isUnconditional())
       continue;
 
     if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
-                      LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
+                      LoopEntryPredicate->getSuccessor(0) != Pair.second))
       return true;
   }
 
@@ -4845,117 +5256,12 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
 
   // Canonicalize the query to match the way instcombine will have
   // canonicalized the comparison.
-  // First, put a constant operand on the right.
-  if (isa<SCEVConstant>(LHS)) {
-    std::swap(LHS, RHS);
-    Pred = ICmpInst::getSwappedPredicate(Pred);
-  }
-  // Then, canonicalize comparisons with boundary cases.
-  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
-    const APInt &RA = RC->getValue()->getValue();
-    switch (Pred) {
-    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
-    case ICmpInst::ICMP_EQ:
-    case ICmpInst::ICMP_NE:
-      break;
-    case ICmpInst::ICMP_UGE:
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        break;
-      }
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMinValue()) return true;
-      break;
-    case ICmpInst::ICMP_ULE:
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMaxValue()) return true;
-      break;
-    case ICmpInst::ICMP_SGE:
-      if ((RA - 1).isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        break;
-      }
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMinSignedValue()) return true;
-      break;
-    case ICmpInst::ICMP_SLE:
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        break;
-      }
-      if (RA.isMaxSignedValue()) return true;
-      break;
-    case ICmpInst::ICMP_UGT:
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMaxValue()) return false;
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        RHS = getConstant(RA - 1);
-        break;
-      }
-      if (RA.isMinValue()) return false;
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        RHS = getConstant(RA + 1);
-        break;
-      }
-      if (RA.isMaxSignedValue()) return false;
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        break;
-      }
-      if ((RA - 1).isMinSignedValue()) {
-       Pred = ICmpInst::ICMP_EQ;
-       RHS = getConstant(RA - 1);
-       break;
-      }
-      if (RA.isMinSignedValue()) return false;
-      break;
-    }
-  }
+  if (SimplifyICmpOperands(Pred, LHS, RHS))
+    if (LHS == RHS)
+      return CmpInst::isTrueWhenEqual(Pred);
+  if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+    if (FoundLHS == FoundRHS)
+      return CmpInst::isFalseWhenEqual(Pred);
 
   // Check to see if we can make the LHS or RHS match.
   if (LHS == FoundRHS || RHS == FoundLHS) {
@@ -5028,26 +5334,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
     break;
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE:
-    if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
-    if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
-    if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
-    if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
-        isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
       return true;
     break;
   }
@@ -5066,7 +5372,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
          "This code doesn't handle negative strides yet!");
 
   const Type *Ty = Start->getType();
-  const SCEV *NegOne = getIntegerSCEV(-1, Ty);
+  const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
   const SCEV *Diff = getMinusSCEV(End, Start);
   const SCEV *RoundUp = getAddExpr(Step, NegOne);
 
@@ -5122,7 +5428,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
       // behavior, so if wrap does occur, the loop could either terminate or
       // loop infinitely, but in either case, the loop is guaranteed to
       // iterate at least until the iteration where the wrapping occurs.
-      const SCEV *One = getIntegerSCEV(1, Step->getType());
+      const SCEV *One = getConstant(Step->getType(), 1);
       if (isSigned) {
         APInt Max = APInt::getSignedMaxValue(BitWidth);
         if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
@@ -5156,10 +5462,10 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // only know that it will execute (max(m,n)-n)/s times. In both cases,
     // the division must round up.
     const SCEV *End = RHS;
-    if (!isLoopGuardedByCond(L,
-                             isSigned ? ICmpInst::ICMP_SLT :
-                                        ICmpInst::ICMP_ULT,
-                             getMinusSCEV(Start, Step), RHS))
+    if (!isLoopEntryGuardedByCond(L,
+                                  isSigned ? ICmpInst::ICMP_SLT :
+                                             ICmpInst::ICMP_ULT,
+                                  getMinusSCEV(Start, Step), RHS))
       End = isSigned ? getSMaxExpr(RHS, Start)
                      : getUMaxExpr(RHS, Start);
 
@@ -5173,7 +5479,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // This allows the subsequent ceiling division of (N+(step-1))/step to
     // compute the correct value.
     const SCEV *StepMinusOne = getMinusSCEV(Step,
-                                            getIntegerSCEV(1, Step->getType()));
+                                            getConstant(Step->getType(), 1));
     MaxEnd = isSigned ?
       getSMinExpr(MaxEnd,
                   getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
@@ -5210,7 +5516,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
       SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
-      Operands[0] = SE.getIntegerSCEV(0, SC->getType());
+      Operands[0] = SE.getConstant(SC->getType(), 0);
       const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop());
       if (const SCEVAddRecExpr *ShiftedAddRec =
             dyn_cast<SCEVAddRecExpr>(Shifted))
@@ -5234,7 +5540,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // iteration exits.
   unsigned BitWidth = SE.getTypeSizeInBits(getType());
   if (!Range.contains(APInt(BitWidth, 0)))
-    return SE.getIntegerSCEV(0, getType());
+    return SE.getConstant(getType(), 0);
 
   if (isAffine()) {
     // If this is an affine expression then we have this situation:
@@ -5459,7 +5765,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
   WriteAsOperand(OS, F, /*PrintType=*/false);
   OS << "\n";
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-    if (isSCEVable(I->getType())) {
+    if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
       OS << *I << '\n';
       OS << "  -->  ";
       const SCEV *SV = SE.getSCEV(&*I);
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 2e18cea..0012b84 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -192,7 +192,7 @@ static bool FactorOutConstant(const SCEV *&S,
 
   // x/x == 1.
   if (S == Factor) {
-    S = SE.getIntegerSCEV(1, S->getType());
+    S = SE.getConstant(S->getType(), 1);
     return true;
   }
 
@@ -244,7 +244,7 @@ static bool FactorOutConstant(const SCEV *&S,
       // Mul's operands. If so, we can just remove it.
       for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
         const SCEV *SOp = M->getOperand(i);
-        const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType());
+        const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
         if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
             Remainder->isZero()) {
           SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
@@ -259,7 +259,7 @@ static bool FactorOutConstant(const SCEV *&S,
   // In an AddRec, check if both start and step are divisible.
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     const SCEV *Step = A->getStepRecurrence(SE);
-    const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType());
+    const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
     if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
       return false;
     if (!StepRem->isZero())
@@ -289,7 +289,7 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
   SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
   // Let ScalarEvolution sort and simplify the non-addrecs list.
   const SCEV *Sum = NoAddRecs.empty() ?
-                    SE.getIntegerSCEV(0, Ty) :
+                    SE.getConstant(Ty, 0) :
                     SE.getAddExpr(NoAddRecs);
   // If it returned an add, use the operands. Otherwise it simplified
   // the sum into a single value, so just use that.
@@ -316,7 +316,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
     while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
       const SCEV *Start = A->getStart();
       if (Start->isZero()) break;
-      const SCEV *Zero = SE.getIntegerSCEV(0, Ty);
+      const SCEV *Zero = SE.getConstant(Ty, 0);
       AddRecs.push_back(SE.getAddRecExpr(Zero,
                                          A->getStepRecurrence(SE),
                                          A->getLoop()));
@@ -392,7 +392,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
         SmallVector<const SCEV *, 8> NewOps;
         for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
           const SCEV *Op = Ops[i];
-          const SCEV *Remainder = SE.getIntegerSCEV(0, Ty);
+          const SCEV *Remainder = SE.getConstant(Ty, 0);
           if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
             // Op now has ElSize factored out.
             ScaledOps.push_back(Op);
@@ -642,6 +642,8 @@ static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
   llvm_unreachable("Unexpected SCEV type!");
 }
 
+namespace {
+
 /// LoopCompare - Compare loops by PickMostRelevantLoop.
 class LoopCompare {
   DominatorTree &DT;
@@ -668,6 +670,8 @@ public:
   }
 };
 
+}
+
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
 
@@ -705,9 +709,11 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
     } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
       // The running sum is an integer, and there's a pointer at this level.
-      // Try to form a getelementptr.
+      // Try to form a getelementptr. If the running sum is instructions,
+      // use a SCEVUnknown to avoid re-analyzing them.
       SmallVector<const SCEV *, 4> NewOps;
-      NewOps.push_back(SE.getUnknown(Sum));
+      NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+                                               SE.getSCEV(Sum));
       for (++I; I != E && I->first == CurLoop; ++I)
         NewOps.push_back(I->second);
       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
@@ -797,7 +803,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
   while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
     Base = A->getStart();
     Rest = SE.getAddExpr(Rest,
-                         SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()),
+                         SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
                                           A->getStepRecurrence(SE),
                                           A->getLoop()));
   }
@@ -966,9 +972,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Determine a normalized form of this expression, which is the expression
   // before any post-inc adjustment is made.
   const SCEVAddRecExpr *Normalized = S;
-  if (L == PostIncLoop) {
-    const SCEV *Step = S->getStepRecurrence(SE);
-    Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step));
+  if (PostIncLoops.count(L)) {
+    PostIncLoopSet Loops;
+    Loops.insert(L);
+    Normalized =
+      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+                                                  Loops, SE, *SE.DT));
   }
 
   // Strip off any non-loop-dominating component from the addrec start.
@@ -976,7 +985,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   const SCEV *PostLoopOffset = 0;
   if (!Start->properlyDominates(L->getHeader(), SE.DT)) {
     PostLoopOffset = Start;
-    Start = SE.getIntegerSCEV(0, Normalized->getType());
+    Start = SE.getConstant(Normalized->getType(), 0);
     Normalized =
       cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start,
                                             Normalized->getStepRecurrence(SE),
@@ -986,10 +995,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Strip off any non-loop-dominating component from the addrec step.
   const SCEV *Step = Normalized->getStepRecurrence(SE);
   const SCEV *PostLoopScale = 0;
-  if (!Step->hasComputableLoopEvolution(L) &&
-      !Step->dominates(L->getHeader(), SE.DT)) {
+  if (!Step->dominates(L->getHeader(), SE.DT)) {
     PostLoopScale = Step;
-    Step = SE.getIntegerSCEV(1, Normalized->getType());
+    Step = SE.getConstant(Normalized->getType(), 1);
     Normalized =
       cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
                                             Normalized->getLoop()));
@@ -1002,7 +1010,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
 
   // Accommodate post-inc mode, if necessary.
   Value *Result;
-  if (L != PostIncLoop)
+  if (!PostIncLoops.count(L))
     Result = PN;
   else {
     // In PostInc mode, use the post-incremented value.
@@ -1072,7 +1080,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
     SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
-    NewOps[0] = SE.getIntegerSCEV(0, Ty);
+    NewOps[0] = SE.getConstant(Ty, 0);
     const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
 
     // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
@@ -1100,7 +1108,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {0,+,1} --> Insert a canonical induction variable into the loop!
   if (S->isAffine() &&
-      S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) {
+      S->getOperand(1) == SE.getConstant(Ty, 1)) {
     // If there's a canonical IV, just use it.
     if (CanonicalIV) {
       assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
@@ -1274,7 +1282,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
+      if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L))
         InsertPt = L->getHeader()->getFirstNonPHI();
       while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@@ -1296,7 +1304,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Value *V = visit(S);
 
   // Remember the expanded value for this SCEV at this location.
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
     InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
   restoreInsertPoint(SaveInsertBB, SaveInsertPt);
@@ -1304,7 +1312,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
 }
 
 void SCEVExpander::rememberInstruction(Value *I) {
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
     InsertedValues.insert(I);
 
   // If we just claimed an existing instruction and that instruction had
@@ -1334,8 +1342,8 @@ Value *
 SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
   assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
-  const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
-                                   SE.getIntegerSCEV(1, Ty), L);
+  const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+                                   SE.getConstant(Ty, 1), L);
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
   BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
   Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 0000000..75c381d
--- /dev/null
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,150 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User)) return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast<PHINode>(User);
+  if (!PN) return false;  // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
+  // a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  unsigned NumUses = 0;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == IV) {
+      ++NumUses;
+      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+        return false;
+    }
+
+  // Okay, all uses of IV by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+    return S;
+  if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+    const SCEV *O = X->getOperand();
+    const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                           Loops, SE, DT);
+    if (O != N)
+      switch (S->getSCEVType()) {
+      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+      case scTruncate: return SE.getTruncateExpr(N, S->getType());
+      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+    SmallVector<const SCEV *, 8> Operands;
+    bool Changed = false;
+    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                             Loops, SE, DT);
+      Changed |= N != O;
+      Operands.push_back(N);
+    }
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+      // An addrec. This is the interesting part.
+      const Loop *L = AR->getLoop();
+      const SCEV *Result = SE.getAddRecExpr(Operands, L);
+      switch (Kind) {
+      default: llvm_unreachable("Unexpected transform name!");
+      case NormalizeAutodetect:
+        if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
+          if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
+            Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+            Loops.insert(L);
+          }
+        break;
+      case Normalize:
+        if (Loops.count(L))
+          Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+        break;
+      case Denormalize:
+        if (Loops.count(L)) {
+          const SCEV *TransformedStep =
+            TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                   User, OperandValToReplace, Loops, SE, DT);
+          Result = SE.getAddExpr(Result, TransformedStep);
+        }
+        break;
+      }
+      return Result;
+    }
+    if (Changed)
+      switch (S->getSCEVType()) {
+      case scAddExpr: return SE.getAddExpr(Operands);
+      case scMulExpr: return SE.getMulExpr(Operands);
+      case scSMaxExpr: return SE.getSMaxExpr(Operands);
+      case scUMaxExpr: return SE.getUMaxExpr(Operands);
+      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+    const SCEV *LO = X->getLHS();
+    const SCEV *RO = X->getRHS();
+    const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    if (LO != LN || RO != RN)
+      return SE.getUDivExpr(LN, RN);
+    return S;
+  }
+  llvm_unreachable("Unexpected SCEV kind!");
+  return 0;
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 5ae72f7..7e8ec2e 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1342,22 +1342,23 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
 /// GetConstantStringInfo - This function computes the length of a
 /// null-terminated C string pointed to by V.  If successful, it returns true
 /// and returns the string in Str.  If unsuccessful, it returns false.
-bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
+bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
+                                 uint64_t Offset,
                                  bool StopAtNul) {
   // If V is NULL then return false;
   if (V == NULL) return false;
 
   // Look through bitcast instructions.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
     return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
   
   // If the value is not a GEP instruction nor a constant expression with a
   // GEP instruction, then return false because ConstantArray can't occur
   // any other way
-  User *GEP = 0;
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+  const User *GEP = 0;
+  if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
     GEP = GEPI;
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::BitCast)
       return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
     if (CE->getOpcode() != Instruction::GetElementPtr)
@@ -1378,7 +1379,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     
     // Check to make sure that the first operand of the GEP is an integer and
     // has value 0 so that we are sure we're indexing into the initializer.
-    ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
     if (FirstIdx == 0 || !FirstIdx->isZero())
       return false;
     
@@ -1386,7 +1387,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     // into the array.  If this occurs, we can't say anything meaningful about
     // the string.
     uint64_t StartIdx = 0;
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
       StartIdx = CI->getZExtValue();
     else
       return false;
@@ -1397,10 +1398,10 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   // The GEP instruction, constant or instruction, must reference a global
   // variable that is a constant and is initialized. The referenced constant
   // initializer is the array that we'll use for optimization.
-  GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+  const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
-  Constant *GlobalInit = GV->getInitializer();
+  const Constant *GlobalInit = GV->getInitializer();
   
   // Handle the ConstantAggregateZero case
   if (isa<ConstantAggregateZero>(GlobalInit)) {
@@ -1411,7 +1412,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   }
   
   // Must be a Constant Array
-  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
   if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
     return false;
   
@@ -1425,8 +1426,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   // to in the array.
   Str.reserve(NumElts-Offset);
   for (unsigned i = Offset; i != NumElts; ++i) {
-    Constant *Elt = Array->getOperand(i);
-    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    const Constant *Elt = Array->getOperand(i);
+    const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
     if (!CI) // This array isn't suitable, non-int initializer.
       return false;
     if (StopAtNul && CI->isZero())
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index f4f8a43..54c715c 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -233,15 +233,14 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
 }
 
 Module*
-llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length,
+llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
                         const std::string& ModuleID,
                         LLVMContext& Context,
                         std::vector<std::string>& symbols,
                         std::string* ErrMsg) {
   // Get the module.
   std::auto_ptr<MemoryBuffer> Buffer(
-    MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str()));
-  memcpy((char*)Buffer->getBufferStart(), BufPtr, Length);
+    MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
   
   Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
   if (!M)
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
index baea544..08f20e7 100644
--- a/lib/Archive/ArchiveInternals.h
+++ b/lib/Archive/ArchiveInternals.h
@@ -77,7 +77,7 @@ namespace llvm {
                           std::vector<std::string>& symbols,
                           std::string* ErrMsg);
   
-  Module* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length,
+  Module* GetBitcodeSymbols(const char *Buffer, unsigned Length,
                             const std::string& ModuleID,
                             LLVMContext& Context,
                             std::vector<std::string>& symbols,
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 3ef15d2..eef6fe0 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -121,7 +121,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
         if (isdigit(Hdr->name[3])) {
           unsigned len = atoi(&Hdr->name[3]);
           const char *nulp = (const char *)memchr(At, '\0', len);
-          pathname.assign(At, nulp != 0 ? nulp - At : len);
+          pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len);
           At += len;
           MemberSize -= len;
           flags |= ArchiveMember::HasLongFilenameFlag;
@@ -348,8 +348,8 @@ Archive::getAllModules(std::vector<Module*>& Modules,
       std::string FullMemberName = archPath.str() +
         "(" + I->getPath().str() + ")";
       MemoryBuffer *Buffer =
-        MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
-      memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
+        MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                       FullMemberName.c_str());
       
       Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
       delete Buffer;
@@ -487,9 +487,9 @@ Archive::findModuleDefiningSymbol(const std::string& symbol,
   // Now, load the bitcode module to get the Module.
   std::string FullMemberName = archPath.str() + "(" +
     mbr->getPath().str() + ")";
-  MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(),
-                                                      FullMemberName.c_str());
-  memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize());
+  MemoryBuffer *Buffer =
+    MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()),
+                                   FullMemberName.c_str());
   
   Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
   if (!m)
@@ -538,8 +538,8 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
         std::string FullMemberName = archPath.str() + "(" +
           mbr->getPath().str() + ")";
         Module* M = 
-          GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(),
-                            FullMemberName, Context, symbols, error);
+          GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context,
+                            symbols, error);
 
         if (M) {
           // Insert the module's symbols into the symbol table
@@ -616,8 +616,8 @@ bool Archive::isBitcodeArchive() {
       archPath.str() + "(" + I->getPath().str() + ")";
 
     MemoryBuffer *Buffer =
-      MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
-    memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
+      MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                     FullMemberName.c_str());
     Module *M = ParseBitcodeFile(Buffer, Context);
     delete Buffer;
     if (!M)
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index a02601a..21d4f65 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -226,8 +226,7 @@ Archive::writeMember(
     std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
       + ")";
     Module* M = 
-      GetBitcodeSymbols((const unsigned char*)data,fSize,
-                        FullMemberName, Context, symbols, ErrMsg);
+      GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
 
     // If the bitcode parsed successfully
     if ( M ) {
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
index 27698cb..7ff478a 100644
--- a/lib/Archive/CMakeLists.txt
+++ b/lib/Archive/CMakeLists.txt
@@ -2,4 +2,4 @@ add_llvm_library(LLVMArchive
   Archive.cpp
   ArchiveReader.cpp
   ArchiveWriter.cpp
-  )
-\ No newline at end of file
+  )
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cdad077..3b08ca1 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -322,9 +322,8 @@ bool LLParser::ParseUnnamedType() {
       return true;
   }
 
-  assert(Lex.getKind() == lltok::kw_type);
   LocTy TypeLoc = Lex.getLoc();
-  Lex.Lex(); // eat kw_type
+  if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true;
 
   PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseType(Ty)) return true;
@@ -1171,10 +1170,10 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
       return false;
     }
     
-    if (Lex.getKind() == lltok::kw_align) {
-      if (ParseOptionalAlignment(Alignment)) return true;
-    } else
-      return true;
+    if (Lex.getKind() != lltok::kw_align)
+      return Error(Lex.getLoc(), "expected metadata or 'align'");
+    
+    if (ParseOptionalAlignment(Alignment)) return true;
   }
 
   return false;
@@ -2352,11 +2351,28 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (NSW)
         return Error(ModifierLoc, "nsw only applies to integer operations");
     }
-    // API compatibility: Accept either integer or floating-point types with
-    // add, sub, and mul.
-    if (!Val0->getType()->isIntOrIntVectorTy() &&
-        !Val0->getType()->isFPOrFPVectorTy())
-      return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
+    // Check that the type is valid for the operator.
+    switch (Opc) {
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (!Val0->getType()->isIntOrIntVectorTy())
+        return Error(ID.Loc, "constexpr requires integer operands");
+      break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      if (!Val0->getType()->isFPOrFPVectorTy())
+        return Error(ID.Loc, "constexpr requires fp operands");
+      break;
+    default: llvm_unreachable("Unknown binary operator!");
+    }
     unsigned Flags = 0;
     if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
     if (NSW)   Flags |= OverflowingBinaryOperator::NoSignedWrap;
@@ -2788,6 +2804,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       ForwardRefVals.find(FunctionName);
     if (FRVI != ForwardRefVals.end()) {
       Fn = M->getFunction(FunctionName);
+      if (Fn->getType() != PFT)
+        return Error(FRVI->second.second, "invalid forward reference to "
+                     "function '" + FunctionName + "' with wrong type!");
+      
       ForwardRefVals.erase(FRVI);
     } else if ((Fn = M->getFunction(FunctionName))) {
       // If this function already exists in the symbol table, then it is
@@ -2933,6 +2953,8 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
     default: assert(0 && "Unknown ParseInstruction result!");
     case InstError: return true;
     case InstNormal:
+      BB->getInstList().push_back(Inst);
+
       // With a normal result, we check to see if the instruction is followed by
       // a comma and metadata.
       if (EatIfPresent(lltok::comma))
@@ -2940,6 +2962,8 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
           return true;
       break;
     case InstExtraComma:
+      BB->getInstList().push_back(Inst);
+
       // If the instruction parser ate an extra comma at the end of it, it
       // *must* be followed by metadata.
       if (ParseInstructionMetadata(Inst))
@@ -2947,8 +2971,6 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       break;        
     }
 
-    BB->getInstList().push_back(Inst);
-
     // Set the name on the instruction.
     if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
   } while (!isa<TerminatorInst>(Inst));
@@ -2995,8 +3017,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
       if (EatIfPresent(lltok::kw_nuw))
         NUW = true;
     }
-    // API compatibility: Accept either integer or floating-point types.
-    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0);
+    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
     if (!Result) {
       if (!Inst->getType()->isIntOrIntVectorTy()) {
         if (NUW)
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index ae460bb..c8f669f 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -74,7 +74,7 @@ namespace llvm {
   public:
     typedef LLLexer::LocTy LocTy;
   private:
-    LLVMContext& Context;
+    LLVMContext &Context;
     LLLexer Lex;
     Module *M;
     
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 7280cf4..e511cbe 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -44,9 +44,9 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
   std::string ErrorStr;
   MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
   if (F == 0) {
-    Err = SMDiagnostic(SMLoc(), "", -1, -1,
+    Err = SMDiagnostic(Filename,
                        "Could not open input file '" + Filename + "': " +
-                       ErrorStr, "");
+                       ErrorStr);
     return 0;
   }
 
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 8840622f..4008a6a 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -31,12 +31,12 @@ using namespace llvm;
 // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
 static cl::opt<int>
 DebugDiv("agg-antidep-debugdiv",
-                      cl::desc("Debug control for aggressive anti-dep breaker"),
-                      cl::init(0), cl::Hidden);
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
 static cl::opt<int>
 DebugMod("agg-antidep-debugmod",
-                      cl::desc("Debug control for aggressive anti-dep breaker"),
-                      cl::init(0), cl::Hidden);
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
 
 AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
                                                MachineBasicBlock *BB) :
@@ -210,7 +210,7 @@ void AggressiveAntiDepBreaker::FinishBlock() {
 }
 
 void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
-                                     unsigned InsertPosIndex) {
+                                       unsigned InsertPosIndex) {
   assert(Count < InsertPosIndex && "Instruction index out of expected range!");
 
   std::set<unsigned> PassthruRegs;
@@ -244,7 +244,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
 }
 
 bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
-                                            MachineOperand& MO)
+                                                MachineOperand& MO)
 {
   if (!MO.isReg() || !MO.isImplicit())
     return false;
@@ -281,9 +281,9 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
 
 /// AntiDepEdges - Return in Edges the anti- and output- dependencies
 /// in SU that we want to consider for breaking.
-static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
   SmallSet<unsigned, 4> RegSet;
-  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
     if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
       unsigned Reg = P->getReg();
@@ -297,14 +297,14 @@ static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
 /// critical path.
-static SUnit *CriticalPathStep(SUnit *SU) {
-  SDep *Next = 0;
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
   unsigned NextDepth = 0;
   // Find the predecessor edge with the greatest depth.
   if (SU != 0) {
-    for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+    for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
          P != PE; ++P) {
-      SUnit *PredSU = P->getSUnit();
+      const SUnit *PredSU = P->getSUnit();
       unsigned PredLatency = P->getLatency();
       unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
       // In the case of a latency tie, prefer an anti-dependency edge over
@@ -359,8 +359,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
 
 void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
                                                   unsigned Count,
-                                              std::set<unsigned>& PassthruRegs)
-{
+                                             std::set<unsigned>& PassthruRegs) {
   unsigned *DefIndices = State->GetDefIndices();
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
@@ -439,7 +438,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
 }
 
 void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
-                                           unsigned Count) {
+                                               unsigned Count) {
   DEBUG(dbgs() << "\tUse Groups:");
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
@@ -704,9 +703,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 /// ScheduleDAG and break them by renaming registers.
 ///
 unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
-                              std::vector<SUnit>& SUnits,
-                              MachineBasicBlock::iterator& Begin,
-                              MachineBasicBlock::iterator& End,
+                              const std::vector<SUnit>& SUnits,
+                              MachineBasicBlock::iterator Begin,
+                              MachineBasicBlock::iterator End,
                               unsigned InsertPosIndex) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
@@ -721,20 +720,21 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   RenameOrderType RenameOrder;
 
   // ...need a map from MI to SUnit.
-  std::map<MachineInstr *, SUnit *> MISUnitMap;
+  std::map<MachineInstr *, const SUnit *> MISUnitMap;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
+    const SUnit *SU = &SUnits[i];
+    MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+                                                               SU));
   }
 
   // Track progress along the critical path through the SUnit graph as
   // we walk the instructions. This is needed for regclasses that only
   // break critical-path anti-dependencies.
-  SUnit *CriticalPathSU = 0;
+  const SUnit *CriticalPathSU = 0;
   MachineInstr *CriticalPathMI = 0;
   if (CriticalPathSet.any()) {
     for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-      SUnit *SU = &SUnits[i];
+      const SUnit *SU = &SUnits[i];
       if (!CriticalPathSU ||
           ((SU->getDepth() + SU->Latency) >
            (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
@@ -775,8 +775,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     // The dependence edges that represent anti- and output-
     // dependencies that are candidates for breaking.
-    std::vector<SDep*> Edges;
-    SUnit *PathSU = MISUnitMap[MI];
+    std::vector<const SDep *> Edges;
+    const SUnit *PathSU = MISUnitMap[MI];
     AntiDepEdges(PathSU, Edges);
 
     // If MI is not on the critical path, then we don't rename
@@ -794,7 +794,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     if (!MI->isKill()) {
       // Attempt to break each anti-dependency...
       for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
-        SDep *Edge = Edges[i];
+        const SDep *Edge = Edges[i];
         SUnit *NextSU = Edge->getSUnit();
 
         if ((Edge->getKind() != SDep::Anti) &&
@@ -838,7 +838,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
           // Also, if there are dependencies on other SUnits with the
           // same register as the anti-dependency, don't attempt to
           // break it.
-          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
             if (P->getSUnit() == NextSU ?
                 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
@@ -847,7 +847,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
               break;
             }
           }
-          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
             if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
                 (P->getKind() != SDep::Output)) {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index a62d68c..506d43e 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -142,9 +142,9 @@ namespace llvm {
     /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
-    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   MachineBasicBlock::iterator& Begin,
-                                   MachineBasicBlock::iterator& End,
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex);
 
     /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
new file mode 100644
index 0000000..f71eee5
--- /dev/null
+++ b/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,285 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty->isVoidTy())
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+  if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
+    assert(GV->hasInitializer() &&
+           "The EH catch-all value must have an initializer");
+    Value *Init = GV->getInitializer();
+    GV = dyn_cast<GlobalVariable>(Init);
+    if (!GV) V = cast<ConstantPointerNull>(Init);
+  }
+
+  assert((GV || isa<ConstantPointerNull>(V)) &&
+         "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+                                const TargetLowering &TLI) {
+  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+    InlineAsm::ConstraintInfo &CI = CInfos[i];
+    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+      if (CType == TargetLowering::C_Memory)
+        return true;
+    }
+
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
+  }
+
+  return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+  ISD::CondCode FPC, FOC;
+  switch (Pred) {
+  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
+  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
+  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+  default:
+    llvm_unreachable("Invalid FCmp predicate opcode!");
+    FOC = FPC = ISD::SETFALSE;
+    break;
+  }
+  if (FiniteOnlyFPMath())
+    return FOC;
+  else
+    return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+  switch (Pred) {
+  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
+  case ICmpInst::ICMP_NE:  return ISD::SETNE;
+  case ICmpInst::ICMP_SLE: return ISD::SETLE;
+  case ICmpInst::ICMP_ULE: return ISD::SETULE;
+  case ICmpInst::ICMP_SGE: return ISD::SETGE;
+  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+  case ICmpInst::ICMP_SLT: return ISD::SETLT;
+  case ICmpInst::ICMP_ULT: return ISD::SETULT;
+  case ICmpInst::ICMP_SGT: return ISD::SETGT;
+  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+  default:
+    llvm_unreachable("Invalid ICmp predicate opcode!");
+    return ISD::SETNE;
+  }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+                                const TargetLowering &TLI) {
+  const Instruction *I = CS.getInstruction();
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or unreachable.
+  //
+  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+  // an unreachable, for now. The way tailcall optimization is currently
+  // implemented means it will add an epilogue followed by a jump. That is
+  // not profitable. Also, if the callee is a special function (e.g.
+  // longjmp on x86), it can end up causing miscompilation that has not
+  // been fully understood.
+  if (!Ret &&
+      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      // Debug info intrinsics do not get in the way of tail call optimization.
+      if (isa<DbgInfoIntrinsic>(BBI))
+        continue;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (U->getOperand(0)->getType()->isPointerTy() &&
+          U->getType()->isPointerTy())))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
+
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 3ee30c6..086b757 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -39,9 +39,9 @@ public:
   /// basic-block region and break them by renaming registers. Return
   /// the number of anti-dependencies broken.
   ///
-  virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                MachineBasicBlock::iterator& Begin,
-                                MachineBasicBlock::iterator& End,
+  virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                MachineBasicBlock::iterator Begin,
+                                MachineBasicBlock::iterator End,
                                 unsigned InsertPosIndex) =0;
   
   /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c86e241..ded4b3f 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -42,8 +42,13 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
 using namespace llvm;
 
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
@@ -56,6 +61,35 @@ static gcp_map_type &getGCMap(void *&P) {
 }
 
 
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form.  This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+                                   unsigned InBits = 0) {
+  unsigned NumBits = 0;
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    NumBits = TD.getPreferredAlignmentLog(GVar);
+  
+  // If InBits is specified, round it to it.
+  if (InBits > NumBits)
+    NumBits = InBits;
+  
+  // If the GV has a specified alignment, take it into account.
+  if (GV->getAlignment() == 0)
+    return NumBits;
+  
+  unsigned GVAlign = Log2_32(GV->getAlignment());
+  
+  // If the GVAlign is larger than NumBits, or if we are required to obey
+  // NumBits because the GV has an assigned section, obey it.
+  if (GVAlign > NumBits || GV->hasSection())
+    NumBits = GVAlign;
+  return NumBits;
+}
+
+
+
+
 AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
   : MachineFunctionPass(&ID),
     TM(tm), MAI(tm.getMCAsmInfo()),
@@ -88,7 +122,7 @@ unsigned AsmPrinter::getFunctionNumber() const {
   return MF->getFunctionNumber();
 }
 
-TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
   return TM.getTargetLowering()->getObjFileLowering();
 }
 
@@ -222,8 +256,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
   const TargetData *TD = TM.getTargetData();
-  unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType());
-  unsigned AlignLog = TD->getPreferredAlignmentLog(GV);
+  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+  
+  // If the alignment is specified, we *must* obey it.  Overaligning a global
+  // with a specified alignment is a prompt way to break globals emitted to
+  // sections and expected to be contiguous (e.g. ObjC metadata).
+  unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
   
   // Handle common and BSS local symbols (.lcomm).
   if (GVKind.isCommon() || GVKind.isBSSLocal()) {
@@ -270,6 +308,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // Handle the zerofill directive on darwin, which is a special form of BSS
   // emission.
   if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+    if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
+    
     // .globl _foo
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
     // .zerofill __DATA, __common, _foo, 400, 5
@@ -347,8 +387,22 @@ void AsmPrinter::EmitFunctionHeader() {
   }
   
   // Emit pre-function debug and/or EH information.
-  if (DE) DE->BeginFunction(MF);
-  if (DD) DD->beginFunction(MF);
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->BeginFunction(MF);
+    } else {
+      DE->BeginFunction(MF);
+    }
+  }
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->beginFunction(MF);
+    } else {
+      DD->beginFunction(MF);
+    }
+  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -434,7 +488,58 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
   AP.OutStreamer.AddBlankLine();
 }
 
+/// EmitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so.  A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+  // This code handles only the 3-operand target-independent form.
+  if (MI->getNumOperands() != 3)
+    return false;
 
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+  if (V.getContext().isSubprogram())
+    OS << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+  OS << V.getName() << " <- ";
+
+  // Register or immediate value. Register 0 means undef.
+  if (MI->getOperand(0).isFPImm()) {
+    APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+    if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+      OS << (double)APF.convertToFloat();
+    } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+      OS << APF.convertToDouble();
+    } else {
+      // There is no good way to print long double.  Convert a copy to
+      // double.  Ah well, it's only a comment.
+      bool ignored;
+      APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                  &ignored);
+      OS << "(long double) " << APF.convertToDouble();
+    }
+  } else if (MI->getOperand(0).isImm()) {
+    OS << MI->getOperand(0).getImm();
+  } else {
+    assert(MI->getOperand(0).isReg() && "Unknown operand type");
+    if (MI->getOperand(0).getReg() == 0) {
+      // Suppress offset, it is not meaningful here.
+      OS << "undef";
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      AP.OutStreamer.EmitRawText(OS.str());
+      return true;
+    }
+    OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+  }
+  
+  OS << '+' << MI->getOperand(1).getImm();
+  // NOTE: Want this comment at start of line, don't emit with AddComment.
+  AP.OutStreamer.EmitRawText(OS.str());
+  return true;
+}
 
 /// EmitFunctionBody - This method emits the body and trailer for a
 /// function.
@@ -453,13 +558,20 @@ void AsmPrinter::EmitFunctionBody() {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       // Print the assembly for the instruction.
-      if (!II->isLabel())
+      if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+          !II->isDebugValue()) {
         HasAnyRealCode = true;
-      
-      ++EmittedInsts;
-      
-      if (ShouldPrintDebugScopes)
-        DD->beginScope(II);
+        ++EmittedInsts;
+      }
+
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->beginScope(II);
+	} else {
+	  DD->beginScope(II);
+	}
+      }
       
       if (isVerbose())
         EmitComments(*II, OutStreamer.GetCommentOS());
@@ -473,6 +585,12 @@ void AsmPrinter::EmitFunctionBody() {
       case TargetOpcode::INLINEASM:
         EmitInlineAsm(II);
         break;
+      case TargetOpcode::DBG_VALUE:
+        if (isVerbose()) {
+          if (!EmitDebugValueComment(II, *this))
+            EmitInstruction(II);
+        }
+        break;
       case TargetOpcode::IMPLICIT_DEF:
         if (isVerbose()) EmitImplicitDef(II, *this);
         break;
@@ -484,16 +602,29 @@ void AsmPrinter::EmitFunctionBody() {
         break;
       }
       
-      if (ShouldPrintDebugScopes)
-        DD->endScope(II);
+      if (ShouldPrintDebugScopes) {
+	if (TimePassesIsEnabled) {
+	  NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+	  DD->endScope(II);
+	} else {
+	  DD->endScope(II);
+	}
+      }
     }
   }
   
   // If the function is empty and the object file uses .subsections_via_symbols,
   // then we need to emit *something* to the function body to prevent the
-  // labels from collapsing together.  Just emit a 0 byte.
-  if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)
-    OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/);
+  // labels from collapsing together.  Just emit a noop.
+  if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) {
+    MCInst Noop;
+    TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+    if (Noop.getOpcode()) {
+      OutStreamer.AddComment("avoids zero-length function");
+      OutStreamer.EmitInstruction(Noop);
+    } else  // Target not mc-ized yet.
+      OutStreamer.EmitRawText(StringRef("\tnop\n"));
+  }
   
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
@@ -514,8 +645,22 @@ void AsmPrinter::EmitFunctionBody() {
   }
   
   // Emit post-function debug information.
-  if (DD) DD->endFunction(MF);
-  if (DE) DE->EndFunction();
+  if (DD) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endFunction(MF);
+    } else {
+      DD->endFunction(MF);
+    }
+  }
+  if (DE) {
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndFunction();
+    } else {
+      DE->EndFunction();
+    }
+  }
   MMI->EndFunction();
   
   // Print out jump tables referenced by the function.
@@ -524,6 +669,12 @@ void AsmPrinter::EmitFunctionBody() {
   OutStreamer.AddBlankLine();
 }
 
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // Target specific DBG_VALUE instructions are handled by each target.
+  return MachineLocation();
+}
 
 bool AsmPrinter::doFinalization(Module &M) {
   // Emit global variables.
@@ -533,11 +684,21 @@ bool AsmPrinter::doFinalization(Module &M) {
   
   // Finalize debug and EH information.
   if (DE) {
-    DE->EndModule();
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName);
+      DE->EndModule();
+    } else {
+      DE->EndModule();
+    }
     delete DE; DE = 0;
   }
   if (DD) {
-    DD->endModule();
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      DD->endModule();
+    } else {
+      DD->endModule();
+    }
     delete DD; DD = 0;
   }
   
@@ -595,7 +756,7 @@ bool AsmPrinter::doFinalization(Module &M) {
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
-    if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+    if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
       OutStreamer.SwitchSection(S);
   
   // Allow the target to emit any magic that it wants at the end of the file,
@@ -877,7 +1038,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   if (GV->getName() == "llvm.global_ctors") {
     OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
-    EmitAlignment(Align, 0);
+    EmitAlignment(Align);
     EmitXXStructorList(GV->getInitializer());
     
     if (TM.getRelocationModel() == Reloc::Static &&
@@ -891,7 +1052,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   
   if (GV->getName() == "llvm.global_dtors") {
     OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
-    EmitAlignment(Align, 0);
+    EmitAlignment(Align);
     EmitXXStructorList(GV->getInitializer());
 
     if (TM.getRelocationModel() == Reloc::Static &&
@@ -984,30 +1145,49 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
   OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
 }
 
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" 
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+                                           const MCSymbol *Lo, unsigned Size) 
+  const {
+  
+  // Emit Hi+Offset - Lo
+  // Get the Hi+Offset expression.
+  const MCExpr *Plus =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), 
+                            MCConstantExpr::Create(Offset, OutContext),
+                            OutContext);
+  
+  // Get the Hi+Offset-Lo expression.
+  const MCExpr *Diff = 
+    MCBinaryExpr::CreateSub(Plus,
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+  
+  if (!MAI->hasSetDirective()) 
+    OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+  else {
+    // Otherwise, emit with .set (aka assignment).
+    MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+    OutStreamer.EmitAssignment(SetLabel, Diff);
+    OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+  }
+}
+    
 
 //===----------------------------------------------------------------------===//
 
 // EmitAlignment - Emit an alignment directive to the specified power of
 // two boundary.  For example, if you pass in 3 here, you will get an 8
 // byte alignment.  If a global value is specified, and if that global has
-// an explicit alignment requested, it will unconditionally override the
-// alignment request.  However, if ForcedAlignBits is specified, this value
-// has final say: the ultimate alignment will be the max of ForcedAlignBits
-// and the alignment computed with NumBits and the global.
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
 //
-// The algorithm is:
-//     Align = NumBits;
-//     if (GV && GV->hasalignment) Align = GV->getalignment();
-//     Align = std::max(Align, ForcedAlignBits);
-//
-void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
-                               unsigned ForcedAlignBits,
-                               bool UseFillExpr) const {
-  if (GV && GV->getAlignment())
-    NumBits = Log2_32(GV->getAlignment());
-  NumBits = std::max(NumBits, ForcedAlignBits);
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
   
-  if (NumBits == 0) return;   // No need to emit alignment.
+  if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
   
   if (getCurrentSection()->getKind().isText())
     OutStreamer.EmitCodeAlignment(1 << NumBits);
@@ -1015,6 +1195,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
     OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
 }
 
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
 ///
 static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
@@ -1142,12 +1326,15 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   }
 }
 
+static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+                                   AsmPrinter &AP);
+
 static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
                                     AsmPrinter &AP) {
   if (AddrSpace != 0 || !CA->isString()) {
     // Not a string.  Print the values in successive locations
     for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      AP.EmitGlobalConstant(CA->getOperand(i), AddrSpace);
+      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
     return;
   }
   
@@ -1163,7 +1350,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
 static void EmitGlobalConstantVector(const ConstantVector *CV,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-    AP.EmitGlobalConstant(CV->getOperand(i), AddrSpace);
+    EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
 }
 
 static void EmitGlobalConstantStruct(const ConstantStruct *CS,
@@ -1183,7 +1370,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
     SizeSoFar += FieldSize + PadSize;
 
     // Now print the actual field value.
-    AP.EmitGlobalConstant(Field, AddrSpace);
+    EmitGlobalConstantImpl(Field, AddrSpace, AP);
 
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
@@ -1203,7 +1390,7 @@ static void EmitGlobalConstantUnion(const ConstantUnion *CU,
   unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
     
   // Print the actually filled part
-  AP.EmitGlobalConstant(Contents, AddrSpace);
+  EmitGlobalConstantImpl(Contents, AddrSpace, AP);
 
   // And pad with enough zeroes
   AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
@@ -1236,7 +1423,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
   
   if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
-    // api needed to prevent premature destruction
+    // API needed to prevent premature destruction
     APInt API = CFP->getValueAPF().bitcastToAPInt();
     const uint64_t *p = API.getRawData();
     if (AP.isVerbose()) {
@@ -1266,8 +1453,8 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
   
   assert(CFP->getType()->isPPC_FP128Ty() &&
          "Floating point constant type not handled");
-  // All long double variants are printed as hex api needed to prevent
-  // premature destruction.
+  // All long double variants are printed as hex
+  // API needed to prevent premature destruction.
   APInt API = CFP->getValueAPF().bitcastToAPInt();
   const uint64_t *p = API.getRawData();
   if (AP.TM.getTargetData()->isBigEndian()) {
@@ -1295,57 +1482,68 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
   }
 }
 
-/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+                                   AsmPrinter &AP) {
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
-    uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
-    if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef.
-    return OutStreamer.EmitZeros(Size, AddrSpace);
+    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    return AP.OutStreamer.EmitZeros(Size, AddrSpace);
   }
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     switch (Size) {
     case 1:
     case 2:
     case 4:
     case 8:
-      if (isVerbose())
-        OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
-      OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+        AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
       return;
     default:
-      EmitGlobalConstantLargeInt(CI, AddrSpace, *this);
+      EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
       return;
     }
   }
   
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return EmitGlobalConstantArray(CVA, AddrSpace, *this);
+    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
   
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return EmitGlobalConstantStruct(CVS, AddrSpace, *this);
+    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
-    return EmitGlobalConstantFP(CFP, AddrSpace, *this);
+    return EmitGlobalConstantFP(CFP, AddrSpace, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
-    OutStreamer.EmitIntValue(0, Size, AddrSpace);
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
     return;
   }
   
   if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV))
-    return EmitGlobalConstantUnion(CVU, AddrSpace, *this);
+    return EmitGlobalConstantUnion(CVU, AddrSpace, AP);
   
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return EmitGlobalConstantVector(V, AddrSpace, *this);
+    return EmitGlobalConstantVector(V, AddrSpace, AP);
   
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  OutStreamer.EmitValue(LowerConstant(CV, *this),
-                        TM.getTargetData()->getTypeAllocSize(CV->getType()),
-                        AddrSpace);
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
+                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
+                           AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  if (Size)
+    EmitGlobalConstantImpl(CV, AddrSpace, *this);
+  else if (MAI->hasSubsectionsViaSymbols()) {
+    // If the global has zero size, emit a single byte so that two labels don't
+    // look like they are at the same location.
+    OutStreamer.EmitIntValue(0, 1, AddrSpace);
+  }
 }
 
 void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
@@ -1613,7 +1811,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
       return GMP;
     }
   
-  llvm_report_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+  report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
   return 0;
 }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 255bcd4..37d10e5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -13,6 +13,7 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
@@ -74,15 +75,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
   AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI);
   OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser));
   if (!TAP)
-    llvm_report_error("Inline asm not supported by this streamer because"
-                      " we don't have an asm parser for this target\n");
+    report_fatal_error("Inline asm not supported by this streamer because"
+                       " we don't have an asm parser for this target\n");
   Parser.setTargetParser(*TAP.get());
 
   // Don't implicitly switch to the text section before the asm.
   int Res = Parser.Run(/*NoInitialTextSection*/ true,
                        /*NoFinalize*/ true);
   if (Res && !HasDiagHandler)
-    llvm_report_error("Error parsing inline asm\n");
+    report_fatal_error("Error parsing inline asm\n");
 }
 
 
@@ -97,7 +98,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   unsigned NumDefs = 0;
   for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
        ++NumDefs)
-    assert(NumDefs != NumOperands-1 && "No asm string?");
+    assert(NumDefs != NumOperands-2 && "No asm string?");
   
   assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
 
@@ -123,6 +124,20 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
                             MAI->getInlineAsmStart());
 
+  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+  // it.
+  unsigned LocCookie = 0;
+  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+    if (MI->getOperand(i-1).isMetadata())
+      if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata())
+        if (SrcLoc->getNumOperands() != 0)
+          if (const ConstantInt *CI =
+              dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) {
+            LocCookie = CI->getZExtValue();
+            break;
+          }
+  }
+  
   // Emit the inline asm to a temporary string so we can emit it through
   // EmitInlineAsm.
   SmallString<256> StringData;
@@ -167,10 +182,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         break;
       case '(':             // $( -> same as GCC's { character.
         ++LastEmitted;      // Consume '(' character.
-        if (CurVariant != -1) {
-          llvm_report_error("Nested variants found in inline asm string: '"
-                            + std::string(AsmStr) + "'");
-        }
+        if (CurVariant != -1)
+          report_fatal_error("Nested variants found in inline asm string: '" +
+                             Twine(AsmStr) + "'");
         CurVariant = 0;     // We're in the first variant now.
         break;
       case '|':
@@ -204,8 +218,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         const char *StrStart = LastEmitted;
         const char *StrEnd = strchr(StrStart, '}');
         if (StrEnd == 0)
-          llvm_report_error(Twine("Unterminated ${:foo} operand in inline asm"
-                                  " string: '") + Twine(AsmStr) + "'");
+          report_fatal_error("Unterminated ${:foo} operand in inline asm"
+                             " string: '" + Twine(AsmStr) + "'");
         
         std::string Val(StrStart, StrEnd);
         PrintSpecial(MI, OS, Val.c_str());
@@ -219,8 +233,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
       
       unsigned Val;
       if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
-        llvm_report_error("Bad $ operand number in inline asm string: '" 
-                          + std::string(AsmStr) + "'");
+        report_fatal_error("Bad $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
       LastEmitted = IDEnd;
       
       char Modifier[2] = { 0, 0 };
@@ -231,22 +245,22 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         if (*LastEmitted == ':') {
           ++LastEmitted;    // Consume ':' character.
           if (*LastEmitted == 0)
-            llvm_report_error("Bad ${:} expression in inline asm string: '" +
-                              std::string(AsmStr) + "'");
+            report_fatal_error("Bad ${:} expression in inline asm string: '" +
+                               Twine(AsmStr) + "'");
           
           Modifier[0] = *LastEmitted;
           ++LastEmitted;    // Consume modifier character.
         }
         
         if (*LastEmitted != '}')
-          llvm_report_error("Bad ${} expression in inline asm string: '" 
-                            + std::string(AsmStr) + "'");
+          report_fatal_error("Bad ${} expression in inline asm string: '" +
+                             Twine(AsmStr) + "'");
         ++LastEmitted;    // Consume '}' character.
       }
       
       if (Val >= NumOperands-1)
-        llvm_report_error("Invalid $ operand number in inline asm string: '" 
-                          + std::string(AsmStr) + "'");
+        report_fatal_error("Invalid $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
       
       // Okay, we finally have a value number.  Ask the target to print this
       // operand!
@@ -273,7 +287,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
             OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
           else {
             AsmPrinter *AP = const_cast<AsmPrinter*>(this);
-            if ((OpFlags & 7) == 4) {
+            if (InlineAsm::isMemKind(OpFlags)) {
               Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
                                                 Modifier[0] ? Modifier : 0,
                                                 OS);
@@ -286,9 +300,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         if (Error) {
           std::string msg;
           raw_string_ostream Msg(msg);
-          Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n";
-          MI->print(Msg);
-          llvm_report_error(Msg.str());
+          Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+          MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
         }
       }
       break;
@@ -296,7 +309,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     }
   }
   OS << '\n' << (char)0;  // null terminate string.
-  EmitInlineAsm(OS.str(), 0/*no loc cookie*/);
+  EmitInlineAsm(OS.str(), LocCookie);
   
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use EmitRawText.
@@ -334,7 +347,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
     raw_string_ostream Msg(msg);
     Msg << "Unknown special formatter '" << Code
          << "' for machine instr: " << *MI;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }    
 }
 
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index afc482d..ca8b843 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -7,3 +7,5 @@ add_llvm_library(LLVMAsmPrinter
   DwarfException.cpp
   OcamlGCPrinter.cpp
   )
+
+target_link_libraries (LLVMAsmPrinter LLVMMCParser)
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 454326d..9cb8314 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -261,11 +261,12 @@ namespace llvm {
     ///
     virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
 
+    uint64_t getValue() const { return Integer; }
+
     /// SizeOf - Determine size of integer value in bytes.
     ///
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEInteger *) { return true; }
     static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index b472d1e..e9e9ba5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -28,9 +28,11 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
@@ -39,6 +41,17 @@
 #include "llvm/System/Path.h"
 using namespace llvm;
 
+static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
+     cl::desc("Print DbgScope information for each machine instruction"));
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+     cl::desc("Disable debug info printing"));
+
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
 //===----------------------------------------------------------------------===//
 
 /// Configuration values for initial hash set sizes (log2).
@@ -179,6 +192,12 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
+/// DbgRange - This is used to track range of instructions with identical
+/// debug info scope.
+///
+typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange;
+
+//===----------------------------------------------------------------------===//
 /// DbgScope - This class is used to track scope information.
 ///
 class DbgScope {
@@ -187,22 +206,21 @@ class DbgScope {
   // Location at which this scope is inlined.
   AssertingVH<MDNode> InlinedAtLocation;  
   bool AbstractScope;                 // Abstract Scope
-  MCSymbol *StartLabel;               // Label ID of the beginning of scope.
-  MCSymbol *EndLabel;                 // Label ID of the end of scope.
   const MachineInstr *LastInsn;       // Last instruction of this scope.
   const MachineInstr *FirstInsn;      // First instruction of this scope.
+  unsigned DFSIn, DFSOut;
   // Scopes defined in scope.  Contents not owned.
   SmallVector<DbgScope *, 4> Scopes;
   // Variables declared in scope.  Contents owned.
   SmallVector<DbgVariable *, 8> Variables;
-
+  SmallVector<DbgRange, 4> Ranges;
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
-      StartLabel(0), EndLabel(0),
-      LastInsn(0), FirstInsn(0), IndentLevel(0) {}
+      LastInsn(0), FirstInsn(0),
+      DFSIn(0), DFSOut(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
@@ -211,18 +229,57 @@ public:
   DIDescriptor getDesc()         const { return Desc; }
   MDNode *getInlinedAt()         const { return InlinedAtLocation; }
   MDNode *getScopeNode()         const { return Desc.getNode(); }
-  MCSymbol *getStartLabel()      const { return StartLabel; }
-  MCSymbol *getEndLabel()        const { return EndLabel; }
   const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
   const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
-  void setStartLabel(MCSymbol *S) { StartLabel = S; }
-  void setEndLabel(MCSymbol *E)   { EndLabel = E; }
-  void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
-  const MachineInstr *getLastInsn()      { return LastInsn; }
-  void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+  const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
+
+  /// openInsnRange - This scope covers instruction range starting from MI.
+  void openInsnRange(const MachineInstr *MI) {
+    if (!FirstInsn) 
+      FirstInsn = MI;
+    
+    if (Parent)
+      Parent->openInsnRange(MI);
+  }
+
+  /// extendInsnRange - Extend the current instruction range covered by 
+  /// this scope.
+  void extendInsnRange(const MachineInstr *MI) {
+    assert (FirstInsn && "MI Range is not open!");
+    LastInsn = MI;
+    if (Parent)
+      Parent->extendInsnRange(MI);
+  }
+
+  /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected
+  /// until now. This is used when a new scope is encountered while walking
+  /// machine instructions.
+  void closeInsnRange(DbgScope *NewScope = NULL) {
+    assert (LastInsn && "Last insn missing!");
+    Ranges.push_back(DbgRange(FirstInsn, LastInsn));
+    FirstInsn = NULL;    
+    LastInsn = NULL;
+    // If Parent dominates NewScope then do not close Parent's instruction 
+    // range.
+    if (Parent && (!NewScope || !Parent->dominates(NewScope)))
+      Parent->closeInsnRange(NewScope);
+  }
+
   void setAbstractScope() { AbstractScope = true; }
   bool isAbstractScope() const { return AbstractScope; }
-  const MachineInstr *getFirstInsn()      { return FirstInsn; }
+
+  // Depth First Search support to walk and mainpluate DbgScope hierarchy.
+  unsigned getDFSOut() const { return DFSOut; }
+  void setDFSOut(unsigned O) { DFSOut = O; }
+  unsigned getDFSIn() const  { return DFSIn; }
+  void setDFSIn(unsigned I)  { DFSIn = I; }
+  bool dominates(const DbgScope *S) {
+    if (S == this) 
+      return true;
+    if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
+      return true;
+    return false;
+  }
 
   /// addScope - Add a scope to the scope.
   ///
@@ -232,48 +289,11 @@ public:
   ///
   void addVariable(DbgVariable *V) { Variables.push_back(V); }
 
-  void fixInstructionMarkers(DenseMap<const MachineInstr *, 
-                             unsigned> &MIIndexMap) {
-    assert(getFirstInsn() && "First instruction is missing!");
-    
-    // Use the end of last child scope as end of this scope.
-    const SmallVector<DbgScope *, 4> &Scopes = getScopes();
-    const MachineInstr *LastInsn = getFirstInsn();
-    unsigned LIndex = 0;
-    if (Scopes.empty()) {
-      assert(getLastInsn() && "Inner most scope does not have last insn!");
-      return;
-    }
-    for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(),
-           SE = Scopes.end(); SI != SE; ++SI) {
-      DbgScope *DS = *SI;
-      DS->fixInstructionMarkers(MIIndexMap);
-      const MachineInstr *DSLastInsn = DS->getLastInsn();
-      unsigned DSI = MIIndexMap[DSLastInsn];
-      if (DSI > LIndex) {
-        LastInsn = DSLastInsn;
-        LIndex = DSI;
-      }
-    }
-
-    unsigned CurrentLastInsnIndex = 0;
-    if (const MachineInstr *CL = getLastInsn()) 
-      CurrentLastInsnIndex = MIIndexMap[CL];
-    unsigned FIndex = MIIndexMap[getFirstInsn()];
-
-    // Set LastInsn as the last instruction for this scope only if
-    // it follows 
-    //  1) this scope's first instruction and
-    //  2) current last instruction for this scope, if any.
-    if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex)
-      setLastInsn(LastInsn);
-  }
-
 #ifndef NDEBUG
   void dump() const;
 #endif
 };
-  
+
 } // end llvm namespace
 
 #ifndef NDEBUG
@@ -282,7 +302,6 @@ void DbgScope::dump() const {
   err.indent(IndentLevel);
   MDNode *N = Desc.getNode();
   N->dump();
-  err << " [" << StartLabel << ", " << EndLabel << "]\n";
   if (AbstractScope)
     err << "Abstract Scope\n";
 
@@ -305,22 +324,23 @@ DbgScope::~DbgScope() {
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   : Asm(A), MMI(Asm->MMI), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), 
-    CurrentFnDbgScope(0), DebugTimer(0) {
+    CurrentFnDbgScope(0), PrevLabel(NULL) {
   NextStringPoolNumber = 0;
       
   DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
-      
-  if (TimePassesIsEnabled)
-    DebugTimer = new Timer("Dwarf Debug Writer");
-      
-  beginModule(M);
+  DwarfDebugRangeSectionSym = 0;
+  FunctionBeginSym = 0;
+  if (TimePassesIsEnabled) {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName);
+      beginModule(M);
+  } else {
+      beginModule(M);
+  }
 }
 DwarfDebug::~DwarfDebug() {
   for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
     DIEBlocks[j]->~DIEBlock();
-
-  delete DebugTimer;
 }
 
 MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
@@ -792,6 +812,64 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
   addBlock(Die, Attribute, 0, Block);
 }
 
+/// addRegisterAddress - Add register location entry in variable DIE.
+bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV,
+                                    const MachineOperand &MO) {
+  assert (MO.isReg() && "Invalid machine operand!");
+  if (!MO.getReg())
+    return false;
+  MachineLocation Location;
+  Location.set(MO.getReg());
+  addAddress(Die, dwarf::DW_AT_location, Location);
+  if (MCSymbol *VS = DV->getDbgValueLabel())
+    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
+  return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, DbgVariable *DV, 
+                                  const MachineOperand &MO) {
+  assert (MO.isImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned Imm = MO.getImm();
+  addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  if (MCSymbol *VS = DV->getDbgValueLabel())
+    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
+  return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, 
+                                    const MachineOperand &MO) {
+  assert (MO.isFPImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  APFloat FPImm = MO.getFPImm()->getValueAPF();
+  
+  // Get the raw data form of the floating point.
+  const APInt FltVal = FPImm.bitcastToAPInt();
+  const char *FltPtr = (const char*)FltVal.getRawData();
+  
+  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+  
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & FltPtr[Start]);
+  
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  
+  if (MCSymbol *VS = DV->getDbgValueLabel())
+    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+             VS);
+  return true; 
+}
+
+
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
   if (Context.isType()) {
@@ -1250,59 +1328,16 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
   if (SP.isArtificial())
     addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
 
+  if (!SP.isLocalToUnit())
+    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+  
+  if (SP.isOptimized())
+    addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
   // DW_TAG_inlined_subroutine may refer to this DIE.
   ModuleCU->insertDIE(SP.getNode(), SPDie);
-  return SPDie;
-}
-
-/// getUpdatedDbgScope - Find DbgScope assicated with the instruction.
-/// Update scope hierarchy. Create abstract scope if required.
-DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
-                                         MDNode *InlinedAt) {
-  assert(N && "Invalid Scope encoding!");
-  assert(MI && "Missing machine instruction!");
-  bool isAConcreteScope = InlinedAt != 0;
-
-  DbgScope *NScope = NULL;
-
-  if (InlinedAt)
-    NScope = DbgScopeMap.lookup(InlinedAt);
-  else
-    NScope = DbgScopeMap.lookup(N);
-  assert(NScope && "Unable to find working scope!");
-
-  if (NScope->getFirstInsn())
-    return NScope;
 
-  DbgScope *Parent = NULL;
-  if (isAConcreteScope) {
-    DILocation IL(InlinedAt);
-    Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
-                         IL.getOrigLocation().getNode());
-    assert(Parent && "Unable to find Parent scope!");
-    NScope->setParent(Parent);
-    Parent->addScope(NScope);
-  } else if (DIDescriptor(N).isLexicalBlock()) {
-    DILexicalBlock DB(N);
-    Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
-    NScope->setParent(Parent);
-    Parent->addScope(NScope);
-  }
-
-  NScope->setFirstInsn(MI);
-
-  if (!Parent && !InlinedAt) {
-    StringRef SPName = DISubprogram(N).getLinkageName();
-    if (SPName == Asm->MF->getFunction()->getName())
-      CurrentFnDbgScope = NScope;
-  }
-
-  if (isAConcreteScope) {
-    ConcreteScopes[InlinedAt] = NScope;
-    getOrCreateAbstractScope(N);
-  }
-
-  return NScope;
+  return SPDie;
 }
 
 DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
@@ -1332,6 +1367,19 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   return AScope;
 }
 
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+static bool isSubprogramContext(MDNode *Context) {
+  if (!Context)
+    return false;
+  DIDescriptor D(Context);
+  if (D.isSubprogram())
+    return true;
+  if (D.isType())
+    return isSubprogramContext(DIType(Context).getContext().getNode());
+  return false;
+}
+
 /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
 /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
 /// If there are global variables in this scope then create and insert
@@ -1347,7 +1395,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
   // expect specification DIE in parent function. So avoid creating 
   // specification DIE for a function defined inside a function.
   if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
-      !SP.getContext().isFile() && !SP.getContext().isSubprogram()) {
+      !SP.getContext().isFile() && 
+      !isSubprogramContext(SP.getContext().getNode())) {
     addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
     
     // Add arguments. 
@@ -1378,31 +1427,48 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
   addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
 
-  if (!DISubprogram(SPNode).isLocalToUnit())
-    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-  
   return SPDie;
 }
 
 /// constructLexicalScope - Construct new DW_TAG_lexical_block
 /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
 DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
-  MCSymbol *Start = Scope->getStartLabel();
-  MCSymbol *End = Scope->getEndLabel();
-  if (Start == 0 || End == 0) return 0;
 
-  assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
-  assert(End->isDefined() && "Invalid end label for an inlined scope!");
-  
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
-  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           Start ? Start : Asm->GetTempSymbol("func_begin",
-                                              Asm->getFunctionNumber()));
-  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           End ? End : Asm->GetTempSymbol("func_end",Asm->getFunctionNumber()));
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  if (Ranges.empty())
+    return 0;
+
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  if (Ranges.size() > 1) {
+    // .debug_range section has not been laid out yet. Emit offset in
+    // .debug_range as a uint, size 4, for now. emitDIE will handle 
+    // DW_AT_ranges appropriately.
+    addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+            DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+         RE = Ranges.end(); RI != RE; ++RI) {
+      DebugRangeSymbols.push_back(LabelsBeforeInsn.lookup(RI->first));
+      DebugRangeSymbols.push_back(LabelsAfterInsn.lookup(RI->second));
+    }
+    DebugRangeSymbols.push_back(NULL);
+    DebugRangeSymbols.push_back(NULL);
+    return ScopeDIE;
+  }
+
+  MCSymbol *Start = LabelsBeforeInsn.lookup(RI->first);
+  MCSymbol *End = LabelsAfterInsn.lookup(RI->second);
+
+  if (Start == 0 || End == 0) return 0;
+
+  assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+  assert(End->isDefined() && "Invalid end label for an inlined scope!");
+  
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
 
   return ScopeDIE;
 }
@@ -1411,14 +1477,28 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
 /// a function. Construct DIE to represent this concrete inlined copy
 /// of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
-  MCSymbol *StartLabel = Scope->getStartLabel();
-  MCSymbol *EndLabel = Scope->getEndLabel();
-  if (StartLabel == 0 || EndLabel == 0) return 0;
-  
+
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  assert (Ranges.empty() == false 
+          && "DbgScope does not have instruction markers!");
+
+  // FIXME : .debug_inlined section specification does not clearly state how
+  // to emit inlined scope that is split into multiple instruction ranges.
+  // For now, use first instruction range and emit low_pc/high_pc pair and
+  // corresponding .debug_inlined section entry for this pair.
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  MCSymbol *StartLabel = LabelsBeforeInsn.lookup(RI->first);
+  MCSymbol *EndLabel = LabelsAfterInsn.lookup(RI->second);
+
+  if (StartLabel == 0 || EndLabel == 0) {
+    assert (0 && "Unexpected Start and End  labels for a inlined scope!");
+    return 0;
+  }
   assert(StartLabel->isDefined() &&
          "Invalid starting label for an inlined scope!");
   assert(EndLabel->isDefined() &&
          "Invalid end label for an inlined scope!");
+
   if (!Scope->getScopeNode())
     return NULL;
   DIScope DS(Scope->getScopeNode());
@@ -1512,59 +1592,34 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
   // Add variable address.
   if (!Scope->isAbstractScope()) {
     // Check if variable is described by DBG_VALUE instruction.
-    if (const MachineInstr *DbgValueInsn = DV->getDbgValue()) {
-      if (DbgValueInsn->getNumOperands() == 3) {
-        // FIXME : Handle getNumOperands != 3 
-        if (DbgValueInsn->getOperand(0).getType() 
-            == MachineOperand::MO_Register
-            && DbgValueInsn->getOperand(0).getReg()) {
-          MachineLocation Location;
-          Location.set(DbgValueInsn->getOperand(0).getReg());
+    if (const MachineInstr *DVInsn = DV->getDbgValue()) {
+      bool updated = false;
+      // FIXME : Handle getNumOperands != 3 
+      if (DVInsn->getNumOperands() == 3) {
+        if (DVInsn->getOperand(0).isReg())
+          updated = addRegisterAddress(VariableDie, DV, DVInsn->getOperand(0));
+        else if (DVInsn->getOperand(0).isImm())
+          updated = addConstantValue(VariableDie, DV, DVInsn->getOperand(0));
+        else if (DVInsn->getOperand(0).isFPImm()) 
+          updated = addConstantFPValue(VariableDie, DV, DVInsn->getOperand(0));
+      } else {
+        MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
+        if (Location.getReg()) {
           addAddress(VariableDie, dwarf::DW_AT_location, Location);
           if (MCSymbol *VS = DV->getDbgValueLabel())
             addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
                      VS);
-        } else if (DbgValueInsn->getOperand(0).getType() == 
-                   MachineOperand::MO_Immediate) {
-          DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-          unsigned Imm = DbgValueInsn->getOperand(0).getImm();
-          addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
-          addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
-          if (MCSymbol *VS = DV->getDbgValueLabel())
-            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-                     VS);
-        } else if (DbgValueInsn->getOperand(0).getType() == 
-                   MachineOperand::MO_FPImmediate) {
-          DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-          APFloat FPImm = DbgValueInsn->getOperand(0).getFPImm()->getValueAPF();
-
-          // Get the raw data form of the floating point.
-          const APInt FltVal = FPImm.bitcastToAPInt();
-          const char *FltPtr = (const char*)FltVal.getRawData();
-
-          unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
-          bool LittleEndian = Asm->getTargetData().isLittleEndian();
-          int Incr = (LittleEndian ? 1 : -1);
-          int Start = (LittleEndian ? 0 : NumBytes - 1);
-          int Stop = (LittleEndian ? NumBytes : -1);
-
-          // Output the constant to DWARF one byte at a time.
-          for (; Start != Stop; Start += Incr)
-            addUInt(Block, 0, dwarf::DW_FORM_data1,
-                    (unsigned char)0xFF & FltPtr[Start]);
-
-          addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
-
-          if (MCSymbol *VS = DV->getDbgValueLabel())
-            addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-                     VS);
-        } else {
-          //FIXME : Handle other operand types.
-          delete VariableDie;
-          return NULL;
+          updated = true;
         }
-      } 
-    } else {
+      }
+      if (!updated) {
+        // If variableDie is not updated then DBG_VALUE instruction does not
+        // have valid variable info.
+        delete VariableDie;
+        return NULL;
+      }
+    } 
+    else {
       MachineLocation Location;
       unsigned FrameReg;
       const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
@@ -1600,7 +1655,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) {
     if (!ATy.isValid())
       continue;
     DICompositeType CATy = getDICompositeType(ATy);
-    if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()) {
+    if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()
+        && !CATy.isForwardDecl()) {
       if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
         ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
     }
@@ -1716,9 +1772,9 @@ void DwarfDebug::constructCompileUnit(MDNode *N) {
   addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
           DIUnit.getLanguage());
   addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
-  addLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, TextSectionSym);
-  addLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           Asm->GetTempSymbol("text_end"));
+  // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
+  // simplifies debug range entries.
+  addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0);
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section. It is always zero when only one
   // compile unit is emitted in one object file.
@@ -1766,7 +1822,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   // Do not create specification DIE if context is either compile unit
   // or a subprogram.
   if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
-      !GVContext.isFile() && !GVContext.isSubprogram()) {
+      !GVContext.isFile() && 
+      !isSubprogramContext(GVContext.getNode())) {
     // Create specification DIE.
     DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
     addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
@@ -1791,7 +1848,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
 
   DIType GTy = DI_GV.getType();
-  if (GTy.isCompositeType() && !GTy.getName().empty()) {
+  if (GTy.isCompositeType() && !GTy.getName().empty()
+      && !GTy.isForwardDecl()) {
     DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
     assert(Entry && "Missing global type!");
     ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
@@ -1829,7 +1887,8 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) {
 /// content. Create global DIEs and emit initial debug info sections.
 /// This is inovked by the target AsmPrinter.
 void DwarfDebug::beginModule(Module *M) {
-  TimeRegion Timer(DebugTimer);
+  if (DisableDebugInfoPrinting)
+    return;
 
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
@@ -1893,10 +1952,7 @@ void DwarfDebug::beginModule(Module *M) {
 /// endModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfDebug::endModule() {
-  if (!ModuleCU)
-    return;
-
-  TimeRegion Timer(DebugTimer);
+  if (!ModuleCU) return;
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -1905,11 +1961,6 @@ void DwarfDebug::endModule() {
     addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
-  // Insert top level DIEs.
-  for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(),
-         TE = TopLevelDIEsVector.end(); TI != TE; ++TI)
-    ModuleCU->getCUDie()->addChild(*TI);
-
   for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(),
          CE = ContainingTypeMap.end(); CI != CE; ++CI) {
     DIE *SPDie = CI->first;
@@ -1918,8 +1969,6 @@ void DwarfDebug::endModule() {
     DIE *NDie = ModuleCU->getDIE(N);
     if (!NDie) continue;
     addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
-    // FIXME - This is not the correct approach.
-    //addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie
   }
 
   // Standard sections final addresses.
@@ -2062,11 +2111,13 @@ void DwarfDebug::collectVariableInfo() {
       if (!MInsn->isDebugValue())
         continue;
 
-      // FIXME : Lift this restriction.
-      if (MInsn->getNumOperands() != 3)
+      // Ignore Undef values.
+      if (MInsn->getOperand(0).isReg() && !MInsn->getOperand(0).getReg())
         continue;
-      DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands()
-                                                - 1).getMetadata()));
+
+      DIVariable DV(
+        const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1)
+                               .getMetadata()));
       if (DV.getTag() == dwarf::DW_TAG_arg_variable)  {
         // FIXME Handle inlined subroutine arguments.
         DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL);
@@ -2102,10 +2153,6 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
   if (DL.isUnknown())
     return;
 
-  // Check and update last known location info.
-  if (DL == PrevInstLoc)
-    return;
-  
   MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
   
   // FIXME: Should only verify each scope once!
@@ -2117,78 +2164,174 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
     DenseMap<const MachineInstr *, DbgVariable *>::iterator DI
       = DbgValueStartMap.find(MI);
     if (DI != DbgValueStartMap.end()) {
-      MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
-      PrevInstLoc = DL;
+      MCSymbol *Label = NULL;
+      if (DL == PrevInstLoc)
+        Label = PrevLabel;
+      else {
+        Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+        PrevInstLoc = DL;
+        PrevLabel = Label;
+      }
       DI->second->setDbgValueLabel(Label);
     }
     return;
   }
 
   // Emit a label to indicate location change. This is used for line 
-  // table even if this instruction does start a new scope.
-  MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
-  PrevInstLoc = DL;
-
-  // update DbgScope if this instruction starts a new scope.
-  InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
-  if (I == DbgScopeBeginMap.end())
-    return;
+  // table even if this instruction does not start a new scope.
+  MCSymbol *Label = NULL;
+  if (DL == PrevInstLoc)
+    Label = PrevLabel;
+  else {
+    Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+    PrevInstLoc = DL;
+    PrevLabel = Label;
+  }
 
-  ScopeVector &SD = I->second;
-  for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI)
-    (*SDI)->setStartLabel(Label);
+  // If this instruction begins a scope then note down corresponding label.
+  if (InsnsBeginScopeSet.count(MI) != 0)
+    LabelsBeforeInsn[MI] = Label;
 }
 
 /// endScope - Process end of a scope.
 void DwarfDebug::endScope(const MachineInstr *MI) {
-  // Ignore DBG_VALUE instruction.
-  if (MI->isDebugValue())
-    return;
-
-  // Check location.
-  DebugLoc DL = MI->getDebugLoc();
-  if (DL.isUnknown())
-    return;
-
-  // Emit a label and update DbgScope if this instruction ends a scope.
-  InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
-  if (I == DbgScopeEndMap.end())
-    return;
-  
-  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
-  Asm->OutStreamer.EmitLabel(Label);
-
-  SmallVector<DbgScope*, 2> &SD = I->second;
-  for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI)
-    (*SDI)->setEndLabel(Label);
-  return;
+  if (InsnsEndScopeSet.count(MI) != 0) {
+    // Emit a label if this instruction ends a scope.
+    MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(Label);
+    LabelsAfterInsn[MI] = Label;
+  }
 }
 
-/// createDbgScope - Create DbgScope for the scope.
-void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+/// getOrCreateDbgScope - Create DbgScope for the scope.
+DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
   if (!InlinedAt) {
     DbgScope *WScope = DbgScopeMap.lookup(Scope);
     if (WScope)
-      return;
+      return WScope;
     WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
     DbgScopeMap.insert(std::make_pair(Scope, WScope));
-    if (DIDescriptor(Scope).isLexicalBlock())
-      createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
-    return;
+    if (DIDescriptor(Scope).isLexicalBlock()) {
+      DbgScope *Parent = 
+        getOrCreateDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+      WScope->setParent(Parent);
+      Parent->addScope(WScope);
+    }
+
+    if (!WScope->getParent()) {
+      StringRef SPName = DISubprogram(Scope).getLinkageName();
+      if (SPName == Asm->MF->getFunction()->getName())
+        CurrentFnDbgScope = WScope;
+    }
+    
+    return WScope;
   }
 
   DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
   if (WScope)
-    return;
+    return WScope;
 
   WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
   DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
   DILocation DL(InlinedAt);
-  createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
+  DbgScope *Parent =
+    getOrCreateDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
+  WScope->setParent(Parent);
+  Parent->addScope(WScope);
+
+  ConcreteScopes[InlinedAt] = WScope;
+  getOrCreateAbstractScope(Scope);
+
+  return WScope;
 }
 
+/// hasValidLocation - Return true if debug location entry attached with
+/// machine instruction encodes valid location info.
+static bool hasValidLocation(LLVMContext &Ctx,
+                             const MachineInstr *MInsn,
+                             MDNode *&Scope, MDNode *&InlinedAt) {
+  if (MInsn->isDebugValue())
+    return false;
+  DebugLoc DL = MInsn->getDebugLoc();
+  if (DL.isUnknown()) return false;
+      
+  MDNode *S = DL.getScope(Ctx);
+  
+  // There is no need to create another DIE for compile unit. For all
+  // other scopes, create one DbgScope now. This will be translated
+  // into a scope DIE at the end.
+  if (DIScope(S).isCompileUnit()) return false;
+     
+  Scope = S;
+  InlinedAt = DL.getInlinedAt(Ctx);
+  return true;
+}
+
+/// calculateDominanceGraph - Calculate dominance graph for DbgScope
+/// hierarchy.
+static void calculateDominanceGraph(DbgScope *Scope) {
+  assert (Scope && "Unable to calculate scop edominance graph!");
+  SmallVector<DbgScope *, 4> WorkStack;
+  WorkStack.push_back(Scope);
+  unsigned Counter = 0;
+  while (!WorkStack.empty()) {
+    DbgScope *WS = WorkStack.back();
+    const SmallVector<DbgScope *, 4> &Children = WS->getScopes();
+    bool visitedChildren = false;
+    for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+           SE = Children.end(); SI != SE; ++SI) {
+      DbgScope *ChildScope = *SI;
+      if (!ChildScope->getDFSOut()) {
+        WorkStack.push_back(ChildScope);
+        visitedChildren = true;
+        ChildScope->setDFSIn(++Counter);
+        break;
+      }
+    }
+    if (!visitedChildren) {
+      WorkStack.pop_back();
+      WS->setDFSOut(++Counter);
+    }
+  }
+}
+
+/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
+static 
+void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
+                       DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
+{
+#ifndef NDEBUG
+  unsigned PrevDFSIn = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      MDNode *Scope = NULL;
+      MDNode *InlinedAt = NULL;
+
+      // Check if instruction has valid location information.
+      if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+        dbgs() << " [ ";
+        if (InlinedAt) 
+          dbgs() << "*";
+        DenseMap<const MachineInstr *, DbgScope *>::iterator DI = 
+          MI2ScopeMap.find(MInsn);
+        if (DI != MI2ScopeMap.end()) {
+          DbgScope *S = DI->second;
+          dbgs() << S->getDFSIn();
+          PrevDFSIn = S->getDFSIn();
+        } else
+          dbgs() << PrevDFSIn;
+      } else 
+        dbgs() << " [ x" << PrevDFSIn;
+      dbgs() << " ]";
+      MInsn->dump();
+    }
+    dbgs() << "\n";
+  }
+#endif
+}
 /// extractScopeInformation - Scan machine instructions in this function
 /// and collect DbgScopes. Return true, if at least one scope was found.
 bool DwarfDebug::extractScopeInformation() {
@@ -2197,71 +2340,100 @@ bool DwarfDebug::extractScopeInformation() {
   if (!DbgScopeMap.empty())
     return false;
 
-  DenseMap<const MachineInstr *, unsigned> MIIndexMap;
-  unsigned MIIndex = 0;
-  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
-  
   // Scan each instruction and create scopes. First build working set of scopes.
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  SmallVector<DbgRange, 4> MIRanges;
+  DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
+  MDNode *PrevScope = NULL;
+  MDNode *PrevInlinedAt = NULL;
+  const MachineInstr *RangeBeginMI = NULL;
+  const MachineInstr *PrevMI = NULL;
   for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      // FIXME : Remove DBG_VALUE check.
-      if (MInsn->isDebugValue()) continue;
-      MIIndexMap[MInsn] = MIIndex++;
-      
-      DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown()) continue;
-      
-      MDNode *Scope = DL.getScope(Ctx);
-      
-      // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated
-      // into a scope DIE at the end.
-      if (DIScope(Scope).isCompileUnit()) continue;
-      createDbgScope(Scope, DL.getInlinedAt(Ctx));
-    }
-  }
+      MDNode *Scope = NULL;
+      MDNode *InlinedAt = NULL;
 
+      // Check if instruction has valid location information.
+      if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+        PrevMI = MInsn;
+        continue;
+      }
+      
+      // If scope has not changed then skip this instruction.
+      if (Scope == PrevScope && PrevInlinedAt == InlinedAt) {
+        PrevMI = MInsn;
+        continue;
+      }
 
-  // Build scope hierarchy using working set of scopes.
-  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
-       I != E; ++I) {
-    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
-         II != IE; ++II) {
-      const MachineInstr *MInsn = II;
-      // FIXME : Remove DBG_VALUE check.
-      if (MInsn->isDebugValue()) continue;
-      DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown()) continue;
+      if (RangeBeginMI) {      
+        // If we have alread seen a beginning of a instruction range and 
+        // current instruction scope does not match scope of first instruction
+        // in this range then create a new instruction range.
+        DbgRange R(RangeBeginMI, PrevMI);
+        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+        MIRanges.push_back(R);
+      } 
 
-      MDNode *Scope = DL.getScope(Ctx);
-      if (Scope == 0) continue;
+      // This is a beginning of a new instruction range.
+      RangeBeginMI = MInsn;
       
-      // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated
-      // into a scope DIE at the end.
-      if (DIScope(Scope).isCompileUnit()) continue;
-      DbgScope *DScope = getUpdatedDbgScope(Scope, MInsn, DL.getInlinedAt(Ctx));
-      DScope->setLastInsn(MInsn);
+      // Reset previous markers.
+      PrevMI = MInsn;
+      PrevScope = Scope;
+      PrevInlinedAt = InlinedAt;
     }
   }
 
+  // Create last instruction range.
+  if (RangeBeginMI && PrevMI && PrevScope) {
+    DbgRange R(RangeBeginMI, PrevMI);
+    MIRanges.push_back(R);
+    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+  }
+  
   if (!CurrentFnDbgScope)
     return false;
 
-  CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap);
+  calculateDominanceGraph(CurrentFnDbgScope);
+  if (PrintDbgScope)
+    printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap);
+
+  // Find ranges of instructions covered by each DbgScope;
+  DbgScope *PrevDbgScope = NULL;
+  for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(),
+         RE = MIRanges.end(); RI != RE; ++RI) {
+    const DbgRange &R = *RI;
+    DbgScope *S = MI2ScopeMap.lookup(R.first);
+    assert (S && "Lost DbgScope for a machine instruction!");
+    if (PrevDbgScope && !PrevDbgScope->dominates(S))
+      PrevDbgScope->closeInsnRange(S);
+    S->openInsnRange(R.first);
+    S->extendInsnRange(R.second);
+    PrevDbgScope = S;
+  }
 
-  // Each scope has first instruction and last instruction to mark beginning
-  // and end of a scope respectively. Create an inverse map that list scopes
-  // starts (and ends) with an instruction. One instruction may start (or end)
-  // multiple scopes. Ignore scopes that are not reachable.
+  if (PrevDbgScope)
+    PrevDbgScope->closeInsnRange();
+
+  identifyScopeMarkers();
+
+  return !DbgScopeMap.empty();
+}
+
+/// identifyScopeMarkers() - 
+/// Each DbgScope has first instruction and last instruction to mark beginning
+/// and end of a scope respectively. Create an inverse map that list scopes
+/// starts (and ends) with an instruction. One instruction may start (or end)
+/// multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
   SmallVector<DbgScope *, 4> WorkList;
   WorkList.push_back(CurrentFnDbgScope);
   while (!WorkList.empty()) {
     DbgScope *S = WorkList.pop_back_val();
-
+    
     const SmallVector<DbgScope *, 4> &Children = S->getScopes();
     if (!Children.empty()) 
       for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
@@ -2270,45 +2442,51 @@ bool DwarfDebug::extractScopeInformation() {
 
     if (S->isAbstractScope())
       continue;
-    const MachineInstr *MI = S->getFirstInsn();
-    assert(MI && "DbgScope does not have first instruction!");
-
-    InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI);
-    if (IDI != DbgScopeBeginMap.end())
-      IDI->second.push_back(S);
-    else
-      DbgScopeBeginMap[MI].push_back(S);
-
-    MI = S->getLastInsn();
-    assert(MI && "DbgScope does not have last instruction!");
-    IDI = DbgScopeEndMap.find(MI);
-    if (IDI != DbgScopeEndMap.end())
-      IDI->second.push_back(S);
-    else
-      DbgScopeEndMap[MI].push_back(S);
+    
+    const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
+    if (Ranges.empty())
+      continue;
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+           RE = Ranges.end(); RI != RE; ++RI) {
+      assert(RI->first && "DbgRange does not have first instruction!");      
+      assert(RI->second && "DbgRange does not have second instruction!");      
+      InsnsBeginScopeSet.insert(RI->first);
+      InsnsEndScopeSet.insert(RI->second);
+    }
   }
+}
 
-  return !DbgScopeMap.empty();
+/// FindFirstDebugLoc - Find the first debug location in the function. This
+/// is intended to be an approximation for the source position of the
+/// beginning of the function.
+static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end();
+         MBBI != MBBE; ++MBBI) {
+      DebugLoc DL = MBBI->getDebugLoc();
+      if (!DL.isUnknown())
+        return DL;
+    }
+  return DebugLoc();
 }
 
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
-  
-  TimeRegion Timer(DebugTimer);
-  if (!extractScopeInformation())
-    return;
+  if (!extractScopeInformation()) return;
   
   collectVariableInfo();
 
+  FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+                                        Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_begin",
-                                                Asm->getFunctionNumber()));
+  Asm->OutStreamer.EmitLabel(FunctionBeginSym);
 
   // Emit label for the implicitly defined dbg.stoppoint at the start of the
   // function.
-  DebugLoc FDL = MF->getDefaultDebugLoc();
+  DebugLoc FDL = FindFirstDebugLoc(MF);
   if (FDL.isUnknown()) return;
   
   MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
@@ -2329,10 +2507,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 /// endFunction - Gather and emit post-function debug information.
 ///
 void DwarfDebug::endFunction(const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo() ||
-      DbgScopeMap.empty()) return;
-  
-  TimeRegion Timer(DebugTimer);
+  if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
 
   if (CurrentFnDbgScope) {
     // Define end label for subprogram.
@@ -2355,8 +2530,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
            AE = AbstractScopesList.end(); AI != AE; ++AI)
       constructScopeDIE(*AI);
     
-    constructScopeDIE(CurrentFnDbgScope);
+    DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
     
+    if (!DisableFramePointerElim(*MF))
+      addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, 
+              dwarf::DW_FORM_flag, 1);
+
+
     DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
                                                  MMI->getFrameMoves()));
   }
@@ -2364,22 +2544,23 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   // Clear debug info
   CurrentFnDbgScope = NULL;
   DeleteContainerSeconds(DbgScopeMap);
-  DbgScopeBeginMap.clear();
-  DbgScopeEndMap.clear();
+  InsnsBeginScopeSet.clear();
+  InsnsEndScopeSet.clear();
   DbgValueStartMap.clear();
   ConcreteScopes.clear();
   DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
   AbstractVariables.clear();
+  LabelsBeforeInsn.clear();
+  LabelsAfterInsn.clear();
   Lines.clear();
+  PrevLabel = NULL;
 }
 
 /// recordSourceLine - Register a source line with debug info. Returns the
 /// unique label that was emitted and which provides correspondence to
 /// the source line list.
 MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
-  TimeRegion Timer(DebugTimer);
-
   StringRef Dir;
   StringRef Fn;
 
@@ -2407,17 +2588,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
   return Label;
 }
 
-/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-/// timed. Look up the source id with the given directory and source file
-/// names. If none currently exists, create a new id and insert it in the
-/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-/// well.
-unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
-                                         const std::string &FileName) {
-  TimeRegion Timer(DebugTimer);
-  return GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
-}
-
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//
@@ -2481,7 +2651,6 @@ void DwarfDebug::computeSizeAndOffsets() {
     sizeof(int8_t);   // Pointer Size (in bytes)
 
   computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
-  CompileUnitOffsets[ModuleCU] = 0;
 }
 
 /// EmitSectionSym - Switch to the specified MCSection and emit an assembler
@@ -2522,7 +2691,8 @@ void DwarfDebug::EmitSectionLabels() {
   EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym = 
     EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
-  EmitSectionSym(Asm, TLOF.getDwarfRangesSection());
+  DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+                                             "debug_range");
 
   TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
   EmitSectionSym(Asm, TLOF.getDataSection());
@@ -2566,6 +2736,15 @@ void DwarfDebug::emitDIE(DIE *Die) {
       Asm->EmitInt32(Addr);
       break;
     }
+    case dwarf::DW_AT_ranges: {
+      // DW_AT_range Value encodes offset in debug_range section.
+      DIEInteger *V = cast<DIEInteger>(Values[i]);
+      Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+                                     V->getValue(),
+                                     DwarfDebugRangeSectionSym,
+                                     4);
+      break;
+    }
     default:
       // Emit an attribute using the defined form.
       Values[i]->EmitValue(Asm, Form);
@@ -2900,7 +3079,7 @@ void DwarfDebug::emitCommonDebugFrame() {
 
   Asm->EmitFrameMoves(Moves, 0, false);
 
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end"));
 }
 
@@ -2942,7 +3121,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
 
   Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false);
 
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
   Asm->OutStreamer.EmitLabel(DebugFrameEnd);
 }
 
@@ -3087,7 +3266,16 @@ void DwarfDebug::EmitDebugARanges() {
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfRangesSection());
+    Asm->getObjFileLowering().getDwarfRangesSection());
+  unsigned char Size = Asm->getTargetData().getPointerSize();
+  for (SmallVector<const MCSymbol *, 8>::iterator
+         I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); 
+       I != E; ++I) {
+    if (*I)
+      Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+    else
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+  }
 }
 
 /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index c7baf5f..b964b23 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -32,8 +32,8 @@ class DbgVariable;
 class MachineFrameInfo;
 class MachineLocation;
 class MachineModuleInfo;
+class MachineOperand;
 class MCAsmInfo;
-class Timer;
 class DIEAbbrev;
 class DIE;
 class DIEBlock;
@@ -174,24 +174,14 @@ class DwarfDebug {
   /// (at the end of the module) as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
+  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, MDNode *> ContainingTypeMap;
 
-  /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
-  SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
-
-  /// TopLevelDIEs - Collection of top level DIEs. 
-  SmallPtrSet<DIE *, 4> TopLevelDIEs;
-  SmallVector<DIE *, 4> TopLevelDIEsVector;
-
   typedef SmallVector<DbgScope *, 2> ScopeVector;
-  typedef DenseMap<const MachineInstr *, ScopeVector>
-    InsnToDbgScopeMapTy;
-
-  /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts.
-  InsnToDbgScopeMapTy DbgScopeBeginMap;
-
-  /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
-  InsnToDbgScopeMapTy DbgScopeEndMap;
+  SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet;
+  SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
@@ -199,18 +189,21 @@ class DwarfDebug {
   DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<MDNode *, 4> InlinedSPNodes;
 
-  /// CompileUnitOffsets - A vector of the offsets of the compile units. This is
-  /// used when calculating the "origin" of a concrete instance of an inlined
-  /// function.
-  DenseMap<CompileUnit *, unsigned> CompileUnitOffsets;
+  /// LabelsBeforeInsn - Maps instruction with label emitted before 
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+  /// LabelsAfterInsn - Maps instruction with label emitted after
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+  SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
 
   /// Previous instruction's location information. This is used to determine
   /// label location to indicate scope boundries in dwarf debug info.
   DebugLoc PrevInstLoc;
+  MCSymbol *PrevLabel;
 
-  /// DebugTimer - Timer for the Dwarf debug writer.
-  Timer *DebugTimer;
-  
   struct FunctionDebugFrameInfo {
     unsigned Number;
     std::vector<MachineMove> Moves;
@@ -225,8 +218,9 @@ class DwarfDebug {
   // the beginning of each supported dwarf section.  These are used to form
   // section offsets and are created by EmitSectionLabels.
   MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
-  MCSymbol *DwarfStrSectionSym, *TextSectionSym;
-  
+  MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+
+  MCSymbol *FunctionBeginSym;
 private:
   
   /// getSourceDirectoryAndFileIds - Return the directory and file ids that
@@ -311,6 +305,15 @@ private:
   void addAddress(DIE *Die, unsigned Attribute,
                   const MachineLocation &Location);
 
+  /// addRegisterAddress - Add register location entry in variable DIE.
+  bool addRegisterAddress(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+
+  /// addConstantValue - Add constant value entry in variable DIE.
+  bool addConstantValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+
+  /// addConstantFPValue - Add constant value entry in variable DIE.
+  bool addConstantFPValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+
   /// addComplexAddress - Start with the address based on the location provided,
   /// and generate the DWARF information necessary to find the actual variable
   /// (navigating the extra location information encoded in the type) based on
@@ -376,13 +379,8 @@ private:
   /// createSubprogramDIE - Create new DIE using SP.
   DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
 
-  /// getUpdatedDbgScope - Find or create DbgScope assicated with 
-  /// the instruction. Initialize scope and update scope hierarchy.
-  DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
-                               MDNode *InlinedAt);
-
-  /// createDbgScope - Create DbgScope for the scope.
-  void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
+  /// getOrCreateDbgScope - Create DbgScope for the scope.
+  DbgScope *getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt);
 
   DbgScope *getOrCreateAbstractScope(MDNode *N);
 
@@ -531,14 +529,10 @@ private:
     return Lines.size();
   }
   
-  /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
-  /// timed. Look up the source id with the given directory and source file
-  /// names. If none currently exists, create a new id and insert it in the
-  /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
-  /// well.
-  unsigned getOrCreateSourceID(const std::string &DirName,
-                               const std::string &FileName);
-  
+  /// identifyScopeMarkers() - Indentify instructions that are marking
+  /// beginning of or end of a scope.
+  void identifyScopeMarkers();
+
   /// extractScopeInformation - Scan machine instructions in this function
   /// and collect DbgScopes. Return true, if atleast one scope was found.
   bool extractScopeInformation();
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 72c97a4..0ff1036 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -33,7 +33,6 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -41,15 +40,9 @@ using namespace llvm;
 
 DwarfException::DwarfException(AsmPrinter *A)
   : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
-    shouldEmitTableModule(false), shouldEmitMovesModule(false),
-    ExceptionTimer(0) {
-  if (TimePassesIsEnabled)
-    ExceptionTimer = new Timer("DWARF Exception Writer");
-}
+    shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
 
-DwarfException::~DwarfException() {
-  delete ExceptionTimer;
-}
+DwarfException::~DwarfException() {}
 
 /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
 /// is shared among many Frame Description Entries.  There is at least one CIE
@@ -159,8 +152,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // On Darwin the linker honors the alignment of eh_frame, which means it must
   // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
   // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3,
-                     0, 0, false);
+  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
 }
 
@@ -262,8 +254,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
     // On Darwin the linker honors the alignment of eh_frame, which means it
     // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
     // get holes which confuse readers of eh_frame.
-    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3,
-                       0, 0, false);
+    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
                                                   EHFrameInfo.Number));
 
@@ -432,7 +423,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
 
     if (!MO.isGlobal()) continue;
     
-    Function *F = dyn_cast<Function>(MO.getGlobal());
+    const Function *F = dyn_cast<Function>(MO.getGlobal());
     if (F == 0) continue;
 
     if (SawFunc) {
@@ -586,7 +577,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
 ///  3. Type ID table contains references to all the C++ typeinfo for all
 ///     catches in the function.  This tables is reverse indexed base 1.
 void DwarfException::EmitExceptionTable() {
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
 
@@ -692,7 +683,7 @@ void DwarfException::EmitExceptionTable() {
 
   // Begin the exception table.
   Asm->OutStreamer.SwitchSection(LSDASection);
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
 
   // Emit the LSDA.
   MCSymbol *GCCETSym = 
@@ -868,7 +859,7 @@ void DwarfException::EmitExceptionTable() {
     Asm->OutStreamer.AddComment("-- Catch TypeInfos --");
     Asm->OutStreamer.AddBlankLine();
   }
-  for (std::vector<GlobalVariable *>::const_reverse_iterator
+  for (std::vector<const GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
 
@@ -891,7 +882,7 @@ void DwarfException::EmitExceptionTable() {
     Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
   }
 
-  Asm->EmitAlignment(2, 0, 0, false);
+  Asm->EmitAlignment(2);
 }
 
 /// EndModule - Emit all exception information that should come after the
@@ -903,9 +894,7 @@ void DwarfException::EndModule() {
   if (!shouldEmitMovesModule && !shouldEmitTableModule)
     return;
 
-  TimeRegion Timer(ExceptionTimer);
-
-  const std::vector<Function *> Personalities = MMI->getPersonalities();
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
 
   for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
     EmitCIE(Personalities[I], I);
@@ -918,7 +907,6 @@ void DwarfException::EndModule() {
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  TimeRegion Timer(ExceptionTimer);
   shouldEmitTable = shouldEmitMoves = false;
 
   // If any landing pads survive, we need an EH table.
@@ -942,7 +930,6 @@ void DwarfException::BeginFunction(const MachineFunction *MF) {
 void DwarfException::EndFunction() {
   if (!shouldEmitMoves && !shouldEmitTable) return;
 
-  TimeRegion Timer(ExceptionTimer);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                 Asm->getFunctionNumber()));
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f35c0b6..5839f8c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -28,7 +28,6 @@ class MachineFunction;
 class MCAsmInfo;
 class MCExpr;
 class MCSymbol;
-class Timer;
 class Function;
 class AsmPrinter;
 
@@ -82,9 +81,6 @@ class DwarfException {
   /// should be emitted.
   bool shouldEmitMovesModule;
 
-  /// ExceptionTimer - Timer for the Dwarf exception writer.
-  Timer *ExceptionTimer;
-
   /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
   /// that is shared among many Frame Description Entries.  There is at least
   /// one CIE in every non-empty .debug_frame section.
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 1db178f..a8c3c7b 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -109,13 +109,11 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
 
     uint64_t FrameSize = FI.getFrameSize();
     if (FrameSize >= 1<<16) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Function '" << FI.getFunction().getName()
-           << "' is too large for the ocaml GC! "
-           << "Frame size " << FrameSize << " >= 65536.\n";
-      Msg << "(" << uintptr_t(&FI) << ")";
-      llvm_report_error(Msg.str()); // Very rude!
+      // Very rude!
+      report_fatal_error("Function '" + FI.getFunction().getName() +
+                         "' is too large for the ocaml GC! "
+                         "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+                         "(" + Twine(uintptr_t(&FI)) + ")");
     }
 
     AP.OutStreamer.AddComment("live roots for " +
@@ -125,12 +123,10 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
     for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
       size_t LiveCount = FI.live_size(J);
       if (LiveCount >= 1<<16) {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "Function '" << FI.getFunction().getName()
-             << "' is too large for the ocaml GC! "
-             << "Live root count " << LiveCount << " >= 65536.";
-        llvm_report_error(Msg.str()); // Very rude!
+        // Very rude!
+        report_fatal_error("Function '" + FI.getFunction().getName() +
+                           "' is too large for the ocaml GC! "
+                           "Live root count "+Twine(LiveCount)+" >= 65536.");
       }
 
       AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 8f51940..9dec22e 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -264,14 +264,8 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
   return Hash;
 }
 
-/// HashEndOfMBB - Hash the last few instructions in the MBB.  For blocks
-/// with no successors, we hash two instructions, because cross-jumping
-/// only saves code when at least two instructions are removed (since a
-/// branch must be inserted).  For blocks with a successor, one of the
-/// two blocks to be tail-merged will end with a branch already, so
-/// it gains to cross-jump even for one instruction.
-static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
-                             unsigned minCommonTailLength) {
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
   MachineBasicBlock::const_iterator I = MBB->end();
   if (I == MBB->begin())
     return 0;   // Empty MBB.
@@ -283,20 +277,8 @@ static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
       return 0;      // MBB empty except for debug info.
     --I;
   }
-  unsigned Hash = HashMachineInstr(I);
 
-  if (I == MBB->begin() || minCommonTailLength == 1)
-    return Hash;   // Single instr MBB.
-
-  --I;
-  while (I->isDebugValue()) {
-    if (I==MBB->begin())
-      return Hash;      // MBB with single non-debug instr.
-    --I;
-  }
-  // Hash in the second-to-last instruction.
-  Hash ^= HashMachineInstr(I) << 2;
-  return Hash;
+  return HashMachineInstr(I);
 }
 
 /// ComputeCommonTailLength - Given two machine basic blocks, compute the number
@@ -811,7 +793,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
   MergePotentials.clear();
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     if (I->succ_empty())
-      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I));
+      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
   }
 
   // See if we can do any tail merging on those.
@@ -897,8 +879,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
               // reinsert conditional branch only, for now
               TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
           }
-          MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U),
-                                                       *P));
+          MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
         }
       }
       if (MergePotentials.size() >= 2)
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 62d1883..3e38872 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_library(LLVMCodeGen
+  Analysis.cpp
   AggressiveAntiDepBreaker.cpp
   BranchFolding.cpp
   CalcSpillWeights.cpp
@@ -49,6 +50,7 @@ add_llvm_library(LLVMCodeGen
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RegAllocFast.cpp
   RegAllocLinearScan.cpp
   RegAllocLocal.cpp
   RegAllocPBQP.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 7d3de89..759fbaa 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -143,13 +143,13 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
 /// critical path.
-static SDep *CriticalPathStep(SUnit *SU) {
-  SDep *Next = 0;
+static const SDep *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
   unsigned NextDepth = 0;
   // Find the predecessor edge with the greatest depth.
-  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
-    SUnit *PredSU = P->getSUnit();
+    const SUnit *PredSU = P->getSUnit();
     unsigned PredLatency = P->getLatency();
     unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
     // In the case of a latency tie, prefer an anti-dependency edge over
@@ -326,18 +326,18 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
 }
 
 unsigned CriticalAntiDepBreaker::
-BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                      MachineBasicBlock::iterator& Begin,
-                      MachineBasicBlock::iterator& End,
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                      MachineBasicBlock::iterator Begin,
+                      MachineBasicBlock::iterator End,
                       unsigned InsertPosIndex) {
   // The code below assumes that there is at least one instruction,
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
 
   // Find the node at the bottom of the critical path.
-  SUnit *Max = 0;
+  const SUnit *Max = 0;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
+    const SUnit *SU = &SUnits[i];
     if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
       Max = SU;
   }
@@ -357,7 +357,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
 
   // Track progress along the critical path through the SUnit graph as we walk
   // the instructions.
-  SUnit *CriticalPathSU = Max;
+  const SUnit *CriticalPathSU = Max;
   MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
 
   // Consider this pattern:
@@ -429,8 +429,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
     // the anti-dependencies in an instruction in order to be effective.
     unsigned AntiDepReg = 0;
     if (MI == CriticalPathMI) {
-      if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
-        SUnit *NextSU = Edge->getSUnit();
+      if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+        const SUnit *NextSU = Edge->getSUnit();
 
         // Only consider anti-dependence edges.
         if (Edge->getKind() == SDep::Anti) {
@@ -452,7 +452,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
             // Also, if there are dependencies on other SUnits with the
             // same register as the anti-dependency, don't attempt to
             // break it.
-            for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
+            for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
                  PE = CriticalPathSU->Preds.end(); P != PE; ++P)
               if (P->getSUnit() == NextSU ?
                     (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 9e8db02..cc42dd2 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -72,9 +72,9 @@ namespace llvm {
     /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
-    unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   MachineBasicBlock::iterator& Begin,
-                                   MachineBasicBlock::iterator& End,
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex);
 
     /// Observe - Update liveness information to account for the current
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 7dbfd7d..f6739f4 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -34,6 +34,7 @@ STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
 
 namespace {
   class DwarfEHPrepare : public FunctionPass {
+    const TargetMachine *TM;
     const TargetLowering *TLI;
     bool CompileFast;
 
@@ -84,7 +85,7 @@ namespace {
     }
 
     /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
-    /// use the ".llvm.eh.catch.all.value" call need to convert to using it's
+    /// use the ".llvm.eh.catch.all.value" call need to convert to using its
     /// initializer instead.
     bool CleanupSelectors();
 
@@ -154,8 +155,9 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid.
-    DwarfEHPrepare(const TargetLowering *tli, bool fast) :
-      FunctionPass(&ID), TLI(tli), CompileFast(fast),
+    DwarfEHPrepare(const TargetMachine *tm, bool fast) :
+      FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()),
+      CompileFast(fast),
       ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
       URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
 
@@ -180,8 +182,8 @@ namespace {
 
 char DwarfEHPrepare::ID = 0;
 
-FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
-  return new DwarfEHPrepare(tli, fast);
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) {
+  return new DwarfEHPrepare(tm, fast);
 }
 
 /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
@@ -218,7 +220,7 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
 }
 
 /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the ".llvm.eh.catch.all.value" call need to convert to using it's
+/// the ".llvm.eh.catch.all.value" call need to convert to using its
 /// initializer instead.
 bool DwarfEHPrepare::CleanupSelectors() {
   if (!EHCatchAllValue) return false;
@@ -421,7 +423,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
 bool DwarfEHPrepare::NormalizeLandingPads() {
   bool Changed = false;
 
-  const MCAsmInfo *MAI = TLI->getTargetMachine().getMCAsmInfo();
+  const MCAsmInfo *MAI = TM->getMCAsmInfo();
   bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index eda167c..b644ebe 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -208,7 +208,7 @@ ELFSection &ELFWriter::getDtorSection() {
 }
 
 // getTextSection - Get the text section for the specified function
-ELFSection &ELFWriter::getTextSection(Function *F) {
+ELFSection &ELFWriter::getTextSection(const Function *F) {
   const MCSectionELF *Text = 
     (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
   return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
@@ -507,7 +507,7 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
   std::string msg;
   raw_string_ostream ErrorMsg(msg);
   ErrorMsg << "Constant unimp for type: " << *CV->getType();
-  llvm_report_error(ErrorMsg.str());
+  report_fatal_error(ErrorMsg.str());
 }
 
 // ResolveConstantExpr - Resolve the constant expression until it stop
@@ -572,10 +572,8 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
   }
   }
 
-  std::string msg(CE->getOpcodeName());
-  raw_string_ostream ErrorMsg(msg);
-  ErrorMsg << ": Unsupported ConstantExpr type";
-  llvm_report_error(ErrorMsg.str());
+  report_fatal_error(CE->getOpcodeName() +
+                     StringRef(": Unsupported ConstantExpr type"));
 
   return std::make_pair(CV, 0); // silence warning
 }
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index b61b484..db66ecc 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -191,7 +191,7 @@ namespace llvm {
     ELFSection &getDtorSection();
     ELFSection &getJumpTableSection();
     ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
-    ELFSection &getTextSection(Function *F);
+    ELFSection &getTextSection(const Function *F);
     ELFSection &getRelocSection(ELFSection &S);
 
     // Helpers for obtaining ELF specific info.
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 61959bb..af5f289 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -29,7 +29,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
   // Determine the maximum depth of any itinerary. This determines the
   // depth of the scoreboard. We always make the scoreboard at least 1
   // cycle deep to avoid dealing with the boundary condition.
-  ScoreboardDepth = 1;
+  unsigned ScoreboardDepth = 1;
   if (!ItinData.isEmpty()) {
     for (unsigned idx = 0; ; ++idx) {
       if (ItinData.isEndMarker(idx))
@@ -45,35 +45,27 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
     }
   }
 
-  Scoreboard = new unsigned[ScoreboardDepth];
-  ScoreboardHead = 0;
+  ReservedScoreboard.reset(ScoreboardDepth);
+  RequiredScoreboard.reset(ScoreboardDepth);
 
   DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " 
                << ScoreboardDepth << '\n');
 }
 
-ExactHazardRecognizer::~ExactHazardRecognizer() {
-  delete [] Scoreboard;
-}
-
 void ExactHazardRecognizer::Reset() {
-  memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned));
-  ScoreboardHead = 0;
+  RequiredScoreboard.reset();
+  ReservedScoreboard.reset();
 }
 
-unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
-  return (ScoreboardHead + offset) % ScoreboardDepth;
-}
-
-void ExactHazardRecognizer::dumpScoreboard() {
+void ExactHazardRecognizer::ScoreBoard::dump() const {
   dbgs() << "Scoreboard:\n";
-  
-  unsigned last = ScoreboardDepth - 1;
-  while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
+
+  unsigned last = Depth - 1;
+  while ((last > 0) && ((*this)[last] == 0))
     last--;
 
   for (unsigned i = 0; i <= last; i++) {
-    unsigned FUs = Scoreboard[getFutureIndex(i)];
+    unsigned FUs = (*this)[i];
     dbgs() << "\t";
     for (int j = 31; j >= 0; j--)
       dbgs() << ((FUs & (1 << j)) ? '1' : '0');
@@ -96,11 +88,23 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
     // stage is occupied. FIXME it would be more accurate to find the
     // same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) && 
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
       if (!freeUnits) {
         DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
         DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
@@ -108,14 +112,14 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
         return Hazard;
       }
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
 
   return NoHazard;
 }
-    
+
 void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
   if (ItinData.isEmpty())
     return;
@@ -125,37 +129,52 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
   // Use the itinerary for the underlying instruction to reserve FU's
   // in the scoreboard at the appropriate future cycles.
   unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx), 
+  for (const InstrStage *IS = ItinData.beginStage(idx),
          *E = ItinData.endStage(idx); IS != E; ++IS) {
     // We must reserve one of the stage's units for every cycle the
     // stage is occupied. FIXME it would be more accurate to reserve
     // the same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < ScoreboardDepth) &&
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
              "Scoreboard depth exceeded!");
-      
-      unsigned index = getFutureIndex(cycle + i);
-      unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
-      
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
       // reduce to a single unit
       unsigned freeUnit = 0;
       do {
         freeUnit = freeUnits;
         freeUnits = freeUnit & (freeUnit - 1);
       } while (freeUnits);
-      
+
       assert(freeUnit && "No function unit available!");
-      Scoreboard[index] |= freeUnit;
+      if (IS->getReservationKind() == InstrStage::Required)
+        RequiredScoreboard[cycle + i] |= freeUnit;
+      else
+        ReservedScoreboard[cycle + i] |= freeUnit;
     }
-    
+
     // Advance the cycle to the next stage.
     cycle += IS->getNextCycles();
   }
-  
-  DEBUG(dumpScoreboard());
+
+  DEBUG(ReservedScoreboard.dump());
+  DEBUG(RequiredScoreboard.dump());
 }
-    
+
 void ExactHazardRecognizer::AdvanceCycle() {
-  Scoreboard[ScoreboardHead] = 0;
-  ScoreboardHead = getFutureIndex(1);
+  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
 }
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
index 71ac979..91c81a9 100644
--- a/lib/CodeGen/ExactHazardRecognizer.h
+++ b/lib/CodeGen/ExactHazardRecognizer.h
@@ -22,35 +22,60 @@
 
 namespace llvm {
   class ExactHazardRecognizer : public ScheduleHazardRecognizer {
-    // Itinerary data for the target.
-    const InstrItineraryData &ItinData;
-
-    // Scoreboard to track function unit usage. Scoreboard[0] is a
+    // ScoreBoard to track function unit usage. ScoreBoard[0] is a
     // mask of the FUs in use in the cycle currently being
-    // schedule. Scoreboard[1] is a mask for the next cycle. The
-    // Scoreboard is used as a circular buffer with the current cycle
-    // indicated by ScoreboardHead.
-    unsigned *Scoreboard;
+    // schedule. ScoreBoard[1] is a mask for the next cycle. The
+    // ScoreBoard is used as a circular buffer with the current cycle
+    // indicated by Head.
+    class ScoreBoard {
+      unsigned *Data;
+
+      // The maximum number of cycles monitored by the Scoreboard. This
+      // value is determined based on the target itineraries to ensure
+      // that all hazards can be tracked.
+      size_t Depth;
+      // Indices into the Scoreboard that represent the current cycle.
+      size_t Head;
+    public:
+      ScoreBoard():Data(NULL), Depth(0), Head(0) { }
+      ~ScoreBoard() {
+        delete[] Data;
+      }
+
+      size_t getDepth() const { return Depth; }
+      unsigned& operator[](size_t idx) const {
+        assert(Depth && "ScoreBoard was not initialized properly!");
+
+        return Data[(Head + idx) % Depth];
+      }
 
-    // The maximum number of cycles monitored by the Scoreboard. This
-    // value is determined based on the target itineraries to ensure
-    // that all hazards can be tracked.
-    unsigned ScoreboardDepth;
+      void reset(size_t d = 1) {
+        if (Data == NULL) {
+          Depth = d;
+          Data = new unsigned[Depth];
+        }
 
-    // Indices into the Scoreboard that represent the current cycle.
-    unsigned ScoreboardHead;
+        memset(Data, 0, Depth * sizeof(Data[0]));
+        Head = 0;
+      }
 
-    // Return the scoreboard index to use for 'offset' cycles in the
-    // future. 'offset' of 0 returns ScoreboardHead.
-    unsigned getFutureIndex(unsigned offset);
+      void advance() {
+        Head = (Head + 1) % Depth;
+      }
 
-    // Print the scoreboard.
-    void dumpScoreboard();
+      // Print the scoreboard.
+      void dump() const;
+    };
+
+    // Itinerary data for the target.
+    const InstrItineraryData &ItinData;
+
+    ScoreBoard ReservedScoreboard;
+    ScoreBoard RequiredScoreboard;
 
   public:
     ExactHazardRecognizer(const InstrItineraryData &ItinData);
-    ~ExactHazardRecognizer();
-    
+
     virtual HazardType getHazardType(SUnit *SU);
     virtual void Reset();
     virtual void EmitInstruction(SUnit *SU);
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6d7cc51..790cb21 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -181,9 +181,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
   
   for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
     if (!InitedRoots.count(*I)) {
-      new StoreInst(ConstantPointerNull::get(cast<PointerType>(
-                      cast<PointerType>((*I)->getType())->getElementType())),
-                    *I, IP);
+      StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+                        cast<PointerType>((*I)->getType())->getElementType())),
+                        *I);
+      SI->insertAfter(*I);
       MadeChange = true;
     }
   
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 87ab7ef..e1c52f7 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -331,15 +331,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   IRBuilder<> Builder(CI->getParent(), CI);
   LLVMContext &Context = CI->getContext();
 
-  Function *Callee = CI->getCalledFunction();
+  const Function *Callee = CI->getCalledFunction();
   assert(Callee && "Cannot lower an indirect call!");
 
   switch (Callee->getIntrinsicID()) {
   case Intrinsic::not_intrinsic:
-    llvm_report_error("Cannot lower a call to a non-intrinsic function '"+
+    report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
                       Callee->getName() + "'!");
   default:
-    llvm_report_error("Code generator does not support intrinsic function '"+
+    report_fatal_error("Code generator does not support intrinsic function '"+
                       Callee->getName()+"'!");
 
     // The setjmp/longjmp intrinsics should only exist in the code if it was
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index ed57f4c..331dc7d 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -50,6 +50,9 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
     cl::desc("Disable Stack Slot Coloring"));
 static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
     cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+    cl::Hidden,
+    cl::desc("Disable Machine LICM"));
 static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
     cl::desc("Disable Machine Sinking"));
 static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
@@ -245,10 +248,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
     PM.add(createSjLjEHPass(getTargetLowering()));
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::Dwarf:
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::None:
     PM.add(createLowerInvokePass(getTargetLowering()));
@@ -337,12 +340,18 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   PM.add(createRegisterAllocator());
   printAndVerify(PM, "After Register Allocation");
 
-  // Perform stack slot coloring.
-  if (OptLevel != CodeGenOpt::None && !DisableSSC) {
+  // Perform stack slot coloring and post-ra machine LICM.
+  if (OptLevel != CodeGenOpt::None) {
     // FIXME: Re-enable coloring with register when it's capable of adding
     // kill markers.
-    PM.add(createStackSlotColoringPass(false));
-    printAndVerify(PM, "After StackSlotColoring");
+    if (!DisableSSC)
+      PM.add(createStackSlotColoringPass(false));
+
+    // Run post-ra machine LICM to hoist reloads / remats.
+    if (!DisablePostRAMachineLICM)
+      PM.add(createMachineLICMPass(false));
+
+    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
   }
 
   // Run post-ra passes.
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 23cff07..26a7190 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -665,10 +665,11 @@ void LiveIntervals::computeIntervals() {
 
     // Track the index of the current machine instr.
     SlotIndex MIIndex = getMBBStartIdx(MBB);
-    DEBUG(dbgs() << MBB->getName() << ":\n");
+    DEBUG(dbgs() << "BB#" << MBB->getNumber()
+          << ":\t\t# derived from " << MBB->getName() << "\n");
 
     // Create intervals for live-ins to this BB first.
-    for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
            LE = MBB->livein_end(); LI != LE; ++LI) {
       handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
       // Multiple live-ins can alias the same register.
@@ -1296,9 +1297,26 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     MachineOperand &O = ri.getOperand();
     ++ri;
     if (MI->isDebugValue()) {
-      // Remove debug info for now.
-      O.setReg(0U);
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
+        uint64_t Offset = MI->getOperand(1).getImm();
+        const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+        DebugLoc DL = MI->getDebugLoc();
+        int FI = isLoadSS ? LdSlot : (int)Slot;
+        if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
+                                                           Offset, MDPtr, DL)) {
+          DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+          ReplaceMachineInstrInMaps(MI, NewDV);
+          MachineBasicBlock *MBB = MI->getParent();
+          MBB->insert(MBB->erase(MI), NewDV);
+          continue;
+        }
+      }
+
       DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      RemoveMachineInstrFromMaps(MI);
+      vrm.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
       continue;
     }
     assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
@@ -2085,7 +2103,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
               << "constraints:\n";
           MI->print(Msg, tm_);
         }
-        llvm_report_error(Msg.str());
+        report_fatal_error(Msg.str());
       }
       for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
         if (!hasInterval(*AS))
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index ca8ecff..079684e 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -531,7 +531,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
 
     // Mark live-in registers as live-in.
     SmallVector<unsigned, 4> Defs;
-    for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+    for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
            EE = MBB->livein_end(); II != EE; ++II) {
       assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
              "Cannot have a live-in virtual register!");
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index bd0ccb4..eaaa1f8 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -191,7 +191,7 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
   const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();  
   if (!livein_empty()) {
     OS << "    Live Ins:";
-    for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+    for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
       OutputReg(OS, *I, TRI);
     OS << '\n';
   }
@@ -218,13 +218,14 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
 }
 
 void MachineBasicBlock::removeLiveIn(unsigned Reg) {
-  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
-  assert(I != livein_end() && "Not a live in!");
+  std::vector<unsigned>::iterator I =
+    std::find(LiveIns.begin(), LiveIns.end(), Reg);
+  assert(I != LiveIns.end() && "Not a live in!");
   LiveIns.erase(I);
 }
 
 bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
-  const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
   return I != livein_end();
 }
 
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 597d51d..84c3d71 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
@@ -51,9 +52,12 @@ namespace {
     }
 
   private:
-    unsigned CurrVN;
+    typedef ScopedHashTableScope<MachineInstr*, unsigned,
+                                 MachineInstrExpressionTrait> ScopeType;
+    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
     ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
     SmallVector<MachineInstr*, 64> Exps;
+    unsigned CurrVN;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -63,7 +67,13 @@ namespace {
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
-    bool ProcessBlock(MachineDomTreeNode *Node);
+    void EnterScope(MachineBasicBlock *MBB);
+    void ExitScope(MachineBasicBlock *MBB);
+    bool ProcessBlock(MachineBasicBlock *MBB);
+    void ExitScopeIfDone(MachineDomTreeNode *Node,
+                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+    bool PerformCSE(MachineDomTreeNode *Node);
   };
 } // end anonymous namespace
 
@@ -277,13 +287,24 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   return CSBBs.count(MI->getParent());
 }
 
-bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+  ScopeType *Scope = new ScopeType(VNT);
+  ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+  assert(SI != ScopeMap.end());
+  ScopeMap.erase(SI);
+  delete SI->second;
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
   bool Changed = false;
 
   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
-  ScopedHashTableScope<MachineInstr*, unsigned,
-    MachineInstrExpressionTrait> VNTS(VNT);
-  MachineBasicBlock *MBB = Node->getBlock();
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
     MachineInstr *MI = &*I;
     ++I;
@@ -356,10 +377,63 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
     CSEPairs.clear();
   }
 
-  // Recursively call ProcessBlock with childred.
-  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
-  for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    Changed |= ProcessBlock(Children[i]);
+  return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+  if (OpenChildren[Node])
+    return;
+
+  // Pop scope.
+  ExitScope(Node->getBlock());
+
+  // Now traverse upwards to pop ancestors whose offsprings are all done.
+  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+    unsigned Left = --OpenChildren[Parent];
+    if (Left != 0)
+      break;
+    ExitScope(Parent->getBlock());
+    Node = Parent;
+  }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+  SmallVector<MachineDomTreeNode*, 32> Scopes;
+  SmallVector<MachineDomTreeNode*, 8> WorkList;
+  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+  // Perform a DFS walk to determine the order of visit.
+  WorkList.push_back(Node);
+  do {
+    Node = WorkList.pop_back_val();
+    Scopes.push_back(Node);
+    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+    unsigned NumChildren = Children.size();
+    OpenChildren[Node] = NumChildren;
+    for (unsigned i = 0; i != NumChildren; ++i) {
+      MachineDomTreeNode *Child = Children[i];
+      ParentMap[Child] = Node;
+      WorkList.push_back(Child);
+    }
+  } while (!WorkList.empty());
+
+  // Now perform CSE.
+  bool Changed = false;
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+    MachineDomTreeNode *Node = Scopes[i];
+    MachineBasicBlock *MBB = Node->getBlock();
+    EnterScope(MBB);
+    Changed |= ProcessBlock(MBB);
+    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+    ExitScopeIfDone(Node, OpenChildren, ParentMap);
+  }
 
   return Changed;
 }
@@ -370,5 +444,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
-  return ProcessBlock(DT->getRootNode());
+  return PerformCSE(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index e4ed7db..3cf10b3 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -51,7 +51,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
-MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
   : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
   if (TM.getRegisterInfo())
@@ -630,7 +630,7 @@ MachineConstantPool::~MachineConstantPool() {
 
 /// CanShareConstantPoolEntry - Test whether the given two constants
 /// can be allocated the same constant pool entry.
-static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
                                       const TargetData *TD) {
   // Handle the trivial case quickly.
   if (A == B) return true;
@@ -645,17 +645,17 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
 
   // If a floating-point value and an integer value have the same encoding,
   // they can share a constant-pool entry.
-  if (ConstantFP *AFP = dyn_cast<ConstantFP>(A))
-    if (ConstantInt *BI = dyn_cast<ConstantInt>(B))
+  if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+    if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
       return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
-  if (ConstantFP *BFP = dyn_cast<ConstantFP>(B))
-    if (ConstantInt *AI = dyn_cast<ConstantInt>(A))
+  if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+    if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
       return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
 
   // Two vectors can share an entry if each pair of corresponding
   // elements could.
-  if (ConstantVector *AV = dyn_cast<ConstantVector>(A))
-    if (ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+  if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
+    if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
       if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
         return false;
       for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
@@ -674,7 +674,7 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B,
 /// an existing one.  User must specify the log2 of the minimum required
 /// alignment for the object.
 ///
-unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, 
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, 
                                                    unsigned Alignment) {
   assert(Alignment && "Alignment must be specified!");
   if (Alignment > PoolAlignment) PoolAlignment = Alignment;
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 3b2eb6d..07a0f45 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -44,7 +44,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI && "MMI not around yet??");
   MMI->setModule(&M);
-  NextFnNum = 1; return false;
+  NextFnNum = 0;
+  return false;
 }
 
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 39b7fb5..99b5beb 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -192,6 +192,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return getBlockAddress() == Other.getBlockAddress();
   case MachineOperand::MO_MCSymbol:
     return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_Metadata:
+    return getMetadata() == Other.getMetadata();
   }
 }
 
@@ -409,19 +411,14 @@ void MachineInstr::addImplicitDefUseOperands() {
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
-/// MachineInstr ctor - This constructor create a MachineInstr and add the
-/// implicit operands. It reserves space for number of operands specified by
-/// TargetInstrDesc or the numOperands if it is not zero. (for
-/// instructions with variable number of operands).
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the TargetInstrDesc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
-  if (!NoImp && TID->getImplicitDefs())
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (!NoImp && TID->getImplicitUses())
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
@@ -434,12 +431,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
-  if (!NoImp && TID->getImplicitDefs())
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (!NoImp && TID->getImplicitUses())
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
@@ -450,17 +443,11 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// MachineInstr ctor - Work exactly the same as the ctor two above, except
 /// that the MachineInstr is created and added to the end of the specified 
 /// basic block.
-///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -475,12 +462,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
     Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      NumImplicitOps++;
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
-      NumImplicitOps++;
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -1123,6 +1105,19 @@ unsigned MachineInstr::isConstantValuePHI() const {
   return Reg;
 }
 
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+  for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.isUse())
+      continue;
+    if (!MO.isDead())
+      return false;
+  }
+  return true;
+}
+
 void MachineInstr::dump() const {
   dbgs() << "  " << *this;
 }
@@ -1192,7 +1187,15 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
       if (TOI.isOptionalDef())
         OS << "opt:";
     }
-    MO.print(OS, TM);
+    if (isDebugValue() && MO.isMetadata()) {
+      // Pretty print DBG_VALUE instructions.
+      const MDNode *MD = MO.getMetadata();
+      if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
+        OS << "!\"" << MDS->getString() << '\"';
+      else
+        MO.print(OS, TM);
+    } else
+      MO.print(OS, TM);
   }
 
   // Briefly indicate whether any call clobbers were omitted.
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 0361694..b2e757d 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,8 +22,8 @@
 
 #define DEBUG_TYPE "machine-licm"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +33,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -41,32 +42,41 @@ using namespace llvm;
 
 STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
 STATISTIC(NumCSEed,   "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+          "Number of machine instructions hoisted out of loops post regalloc");
 
 namespace {
   class MachineLICM : public MachineFunctionPass {
-    MachineConstantPool *MCP;
+    bool PreRegAlloc;
+
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-    BitVector AllocatableSet;
+    const MachineFrameInfo *MFI;
+    MachineRegisterInfo *RegInfo;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
-    MachineLoopInfo      *LI;      // Current MachineLoopInfo
+    MachineLoopInfo      *MLI;     // Current MachineLoopInfo
     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
-    MachineRegisterInfo  *RegInfo; // Machine register information
 
     // State that is updated as we process loops
     bool         Changed;          // True if a loop is changed.
-    bool         FirstInLoop;      // True if it's the first LICM in the loop.
     MachineLoop *CurLoop;          // The current loop we are working on.
     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
 
+    BitVector AllocatableSet;
+
     // For each opcode, keep a list of potentail CSE instructions.
     DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
   public:
     static char ID; // Pass identification, replacement for typeid
-    MachineLICM() : MachineFunctionPass(&ID) {}
+    MachineLICM() :
+      MachineFunctionPass(&ID), PreRegAlloc(true) {}
+
+    explicit MachineLICM(bool PreRA) :
+      MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -88,6 +98,39 @@ namespace {
     }
 
   private:
+    /// CandidateInfo - Keep track of information about hoisting candidates.
+    struct CandidateInfo {
+      MachineInstr *MI;
+      unsigned      Def;
+      int           FI;
+      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+        : MI(mi), Def(def), FI(fi) {}
+    };
+
+    /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+    /// invariants out to the preheader.
+    void HoistRegionPostRA();
+
+    /// HoistPostRA - When an instruction is found to only use loop invariant
+    /// operands that is safe to hoist, this instruction is called to do the
+    /// dirty work.
+    void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+    /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+    /// gather register def and frame object update information.
+    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+                   SmallSet<int, 32> &StoredFIs,
+                   SmallVector<CandidateInfo, 32> &Candidates);
+
+    /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+    /// current loop.
+    void AddToLiveIns(unsigned Reg);
+
+    /// IsLICMCandidate - Returns true if the instruction may be a suitable
+    /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+    /// not safe to hoist it.
+    bool IsLICMCandidate(MachineInstr &I);
+
     /// IsLoopInvariantInst - Returns true if the instruction is loop
     /// invariant. I.e., all virtual register operands are defined outside of
     /// the loop, physical registers aren't accessed (explicitly or implicitly),
@@ -145,7 +188,9 @@ char MachineLICM::ID = 0;
 static RegisterPass<MachineLICM>
 X("machinelicm", "Machine Loop Invariant Code Motion");
 
-FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); }
+FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
+  return new MachineLICM(PreRegAlloc);
+}
 
 /// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
 /// loop that has a preheader.
@@ -156,31 +201,31 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
   return true;
 }
 
-/// Hoist expressions out of the specified loop. Note, alias info for inner loop
-/// is not preserved so it is not a good idea to run LICM multiple times on one
-/// loop.
-///
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "******** Machine LICM ********\n");
+  if (PreRegAlloc)
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+  else
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
 
-  Changed = FirstInLoop = false;
-  MCP = MF.getConstantPool();
+  Changed = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
   TRI = TM->getRegisterInfo();
+  MFI = MF.getFrameInfo();
   RegInfo = &MF.getRegInfo();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
   // Get our Loop information...
-  LI = &getAnalysis<MachineLoopInfo>();
-  DT = &getAnalysis<MachineDominatorTree>();
-  AA = &getAnalysis<AliasAnalysis>();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  DT  = &getAnalysis<MachineDominatorTree>();
+  AA  = &getAnalysis<AliasAnalysis>();
 
-  for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){
     CurLoop = *I;
 
-    // Only visit outer-most preheader-sporting loops.
-    if (!LoopIsOuterMostWithPreheader(CurLoop))
+    // If this is done before regalloc, only visit outer-most preheader-sporting
+    // loops.
+    if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop))
       continue;
 
     // Determine the block to which to hoist instructions. If we can't find a
@@ -193,16 +238,230 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
     if (!CurPreheader)
       continue;
 
-    // CSEMap is initialized for loop header when the first instruction is
-    // being hoisted.
-    FirstInLoop = true;
-    HoistRegion(DT->getNode(CurLoop->getHeader()));
-    CSEMap.clear();
+    if (!PreRegAlloc)
+      HoistRegionPostRA();
+    else {
+      // CSEMap is initialized for loop header when the first instruction is
+      // being hoisted.
+      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+      HoistRegion(N);
+      CSEMap.clear();
+    }
   }
 
   return Changed;
 }
 
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end(); o != oe; ++o) {
+    if (!(*o)->isStore() || !(*o)->getValue())
+      continue;
+    if (const FixedStackPseudoSourceValue *Value =
+        dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+      if (Value->getFrameIndex() == FI)
+        return true;
+    }
+  }
+  return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+                            unsigned *PhysRegDefs,
+                            SmallSet<int, 32> &StoredFIs,
+                            SmallVector<CandidateInfo, 32> &Candidates) {
+  bool RuledOut = false;
+  bool HasNonInvariantUse = false;
+  unsigned Def = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isFI()) {
+      // Remember if the instruction stores to the frame index.
+      int FI = MO.getIndex();
+      if (!StoredFIs.count(FI) &&
+          MFI->isSpillSlotObjectIndex(FI) &&
+          InstructionStoresToFI(MI, FI))
+        StoredFIs.insert(FI);
+      HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+           "Not expecting virtual register!");
+
+    if (!MO.isDef()) {
+      if (Reg && PhysRegDefs[Reg])
+        // If it's using a non-loop-invariant register, then it's obviously not
+        // safe to hoist.
+        HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (MO.isImplicit()) {
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+      if (!MO.isDead())
+        // Non-dead implicit def? This cannot be hoisted.
+        RuledOut = true;
+      // No need to check if a dead implicit def is also defined by
+      // another instruction.
+      continue;
+    }
+
+    // FIXME: For now, avoid instructions with multiple defs, unless
+    // it's a dead implicit def.
+    if (Def)
+      RuledOut = true;
+    else
+      Def = Reg;
+
+    // If we have already seen another instruction that defines the same
+    // register, then this is not safe.
+    if (++PhysRegDefs[Reg] > 1)
+      // MI defined register is seen defined by another instruction in
+      // the loop, it cannot be a LICM candidate.
+      RuledOut = true;
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      if (++PhysRegDefs[*AS] > 1)
+        RuledOut = true;
+  }
+
+  // Only consider reloads for now and remats which do not have register
+  // operands. FIXME: Consider unfold load folding instructions.
+  if (Def && !RuledOut) {
+    int FI = INT_MIN;
+    if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+        (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+      Candidates.push_back(CandidateInfo(MI, Def, FI));
+  }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+  unsigned NumRegs = TRI->getNumRegs();
+  unsigned *PhysRegDefs = new unsigned[NumRegs];
+  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+
+  SmallVector<CandidateInfo, 32> Candidates;
+  SmallSet<int, 32> StoredFIs;
+
+  // Walk the entire region, count number of defs for each register, and
+  // collect potential LICM candidates.
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    // Conservatively treat live-in's as an external def.
+    // FIXME: That means a reload that're reused in successor block(s) will not
+    // be LICM'ed.
+    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+           E = BB->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+    }
+
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+    }
+  }
+
+  // Now evaluate whether the potential candidates qualify.
+  // 1. Check if the candidate defined register is defined by another
+  //    instruction in the loop.
+  // 2. If the candidate is a load from stack slot (always true for now),
+  //    check if the slot is stored anywhere in the loop.
+  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+    if (Candidates[i].FI != INT_MIN &&
+        StoredFIs.count(Candidates[i].FI))
+      continue;
+
+    if (PhysRegDefs[Candidates[i].Def] == 1) {
+      bool Safe = true;
+      MachineInstr *MI = Candidates[i].MI;
+      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+        const MachineOperand &MO = MI->getOperand(j);
+        if (!MO.isReg() || MO.isDef() || !MO.getReg())
+          continue;
+        if (PhysRegDefs[MO.getReg()]) {
+          // If it's using a non-loop-invariant register, then it's obviously
+          // not safe to hoist.
+          Safe = false;
+          break;
+        }
+      }
+      if (Safe)
+        HoistPostRA(MI, Candidates[i].Def);
+    }
+  }
+
+  delete[] PhysRegDefs;
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    if (!BB->isLiveIn(Reg))
+      BB->addLiveIn(Reg);
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+        if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+          MO.setIsKill(false);
+      }
+    }
+  }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+  // Now move the instructions to the predecessor, inserting it before any
+  // terminator instructions.
+  DEBUG({
+      dbgs() << "Hoisting " << *MI;
+      if (CurPreheader->getBasicBlock())
+        dbgs() << " to MachineBasicBlock "
+               << CurPreheader->getName();
+      if (MI->getParent()->getBasicBlock())
+        dbgs() << " from MachineBasicBlock "
+               << MI->getParent()->getName();
+      dbgs() << "\n";
+    });
+
+  // Splice the instruction to the preheader.
+  MachineBasicBlock *MBB = MI->getParent();
+  CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI);
+
+  // Add register to livein list to all the BBs in the current loop since a 
+  // loop invariant must be kept live throughout the whole loop. This is
+  // important to ensure later passes do not scavenge the def register.
+  AddToLiveIns(Def);
+
+  ++NumPostRAHoisted;
+  Changed = true;
+}
+
 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
 /// first order w.r.t the DominatorTree. This allows us to visit definitions
@@ -223,17 +482,17 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
   }
 
   const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
-
   for (unsigned I = 0, E = Children.size(); I != E; ++I)
     HoistRegion(Children[I]);
 }
 
-/// IsLoopInvariantInst - Returns true if the instruction is loop
-/// invariant. I.e., all virtual register operands are defined outside of the
-/// loop, physical registers aren't accessed explicitly, and there are no side
-/// effects that aren't captured by the operands or other flags.
-/// 
-bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+  if (I.isImplicitDef())
+    return false;
+
   const TargetInstrDesc &TID = I.getDesc();
   
   // Ignore stuff that we obviously can't hoist.
@@ -251,6 +510,17 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
       // This is a trivial form of alias analysis.
       return false;
   }
+  return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+/// 
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+  if (!IsLICMCandidate(I))
+    return false;
 
   // The instruction is loop invariant if all of its operands are.
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
@@ -341,9 +611,6 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
-  if (MI.isImplicitDef())
-    return false;
-
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
   // increase register pressure. We want to make sure it doesn't increase
   // spilling.
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index f813a55..25284d6 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -12,8 +12,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -315,18 +313,18 @@ void MachineModuleInfo::EndFunction() {
 
 /// AnalyzeModule - Scan the module for global debug information.
 ///
-void MachineModuleInfo::AnalyzeModule(Module &M) {
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
   // Insert functions in the llvm.used array (but not llvm.compiler.used) into
   // UsedFunctions.
-  GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
   if (!GV || !GV->hasInitializer()) return;
 
   // Should be an array of 'i8*'.
-  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
   if (InitList == 0) return;
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
-    if (Function *F =
+    if (const Function *F =
           dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
       UsedFunctions.insert(F);
 }
@@ -407,7 +405,7 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
 /// addPersonality - Provide the personality function for the exception
 /// information.
 void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
-                                       Function *Personality) {
+                                       const Function *Personality) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.Personality = Personality;
 
@@ -426,7 +424,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
 ///
 void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                                        std::vector<GlobalVariable *> &TyInfo) {
+                                  std::vector<const GlobalVariable *> &TyInfo) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   for (unsigned N = TyInfo.size(); N; --N)
     LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
@@ -435,7 +433,7 @@ void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
 /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
 ///
 void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                                        std::vector<GlobalVariable *> &TyInfo) {
+                                  std::vector<const GlobalVariable *> &TyInfo) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   std::vector<unsigned> IdsInFilter(TyInfo.size());
   for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
@@ -452,10 +450,12 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
 
 /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
 /// pads.
-void MachineModuleInfo::TidyLandingPads() {
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
   for (unsigned i = 0; i != LandingPads.size(); ) {
     LandingPadInfo &LandingPad = LandingPads[i];
-    if (LandingPad.LandingPadLabel && !LandingPad.LandingPadLabel->isDefined())
+    if (LandingPad.LandingPadLabel &&
+        !LandingPad.LandingPadLabel->isDefined() &&
+        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
       LandingPad.LandingPadLabel = 0;
 
     // Special case: we *should* emit LPs with null LP MBB. This indicates
@@ -468,7 +468,10 @@ void MachineModuleInfo::TidyLandingPads() {
     for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
       MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
       MCSymbol *EndLabel = LandingPad.EndLabels[j];
-      if (BeginLabel->isDefined() && EndLabel->isDefined()) continue;
+      if ((BeginLabel->isDefined() ||
+           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+          (EndLabel->isDefined() ||
+           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
       
       LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
       LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
@@ -492,7 +495,7 @@ void MachineModuleInfo::TidyLandingPads() {
 
 /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
 /// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
   for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
     if (TypeInfos[i] == TI) return i + 1;
 
@@ -532,7 +535,7 @@ try_next:;
 }
 
 /// getPersonality - Return the personality function for the current function.
-Function *MachineModuleInfo::getPersonality() const {
+const Function *MachineModuleInfo::getPersonality() const {
   // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
   // function
   return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index d9ab677..ea5ca0c 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -12,6 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
@@ -130,6 +133,138 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
   return ++UI == use_nodbg_end();
 }
 
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == Reg || I->second == Reg)
+      return true;
+  return false;
+}
+
+bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
+  for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+    if (*I == Reg)
+      return true;
+  return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->second == VReg)
+      return I->first;
+  return 0;
+}
+
+static cl::opt<bool>
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
+                  cl::desc("Schedule copies of livein registers"),
+                  cl::init(false));
+
+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
+/// physical register has only a single copy use, then coalesced the copy
+/// if possible.
+static void EmitLiveInCopy(MachineBasicBlock *MBB,
+                           MachineBasicBlock::iterator &InsertPos,
+                           unsigned VirtReg, unsigned PhysReg,
+                           const TargetRegisterClass *RC,
+                           DenseMap<MachineInstr*, unsigned> &CopyRegMap,
+                           const MachineRegisterInfo &MRI,
+                           const TargetRegisterInfo &TRI,
+                           const TargetInstrInfo &TII) {
+  unsigned NumUses = 0;
+  MachineInstr *UseMI = NULL;
+  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+         UE = MRI.use_end(); UI != UE; ++UI) {
+    UseMI = &*UI;
+    if (++NumUses > 1)
+      break;
+  }
+
+  // If the number of uses is not one, or the use is not a move instruction,
+  // don't coalesce. Also, only coalesce away a virtual register to virtual
+  // register copy.
+  bool Coalesced = false;
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (NumUses == 1 &&
+      TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+      TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    VirtReg = DstReg;
+    Coalesced = true;
+  }
+
+  // Now find an ideal location to insert the copy.
+  MachineBasicBlock::iterator Pos = InsertPos;
+  while (Pos != MBB->begin()) {
+    MachineInstr *PrevMI = prior(Pos);
+    DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
+    // copyRegToReg might emit multiple instructions to do a copy.
+    unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
+    if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
+      // This is what the BB looks like right now:
+      // r1024 = mov r0
+      // ...
+      // r1    = mov r1024
+      //
+      // We want to insert "r1025 = mov r1". Inserting this copy below the
+      // move to r1024 makes it impossible for that move to be coalesced.
+      //
+      // r1025 = mov r1
+      // r1024 = mov r0
+      // ...
+      // r1    = mov 1024
+      // r2    = mov 1025
+      break; // Woot! Found a good location.
+    --Pos;
+  }
+
+  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+  (void) Emitted;
+
+  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+  if (Coalesced) {
+    if (&*InsertPos == UseMI) ++InsertPos;
+    MBB->erase(UseMI);
+  }
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                                      const TargetRegisterInfo &TRI,
+                                      const TargetInstrInfo &TII) {
+  if (SchedLiveInCopies) {
+    // Emit the copies at a heuristically-determined location in the block.
+    DenseMap<MachineInstr*, unsigned> CopyRegMap;
+    MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
+    for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
+           E = livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = getRegClass(LI->second);
+        EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
+                       RC, CopyRegMap, *this, TRI, TII);
+      }
+  } else {
+    // Emit the copies into the top of the block.
+    for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
+           E = livein_end(); LI != E; ++LI)
+      if (LI->second) {
+        const TargetRegisterClass *RC = getRegClass(LI->second);
+        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+                                        LI->second, LI->first, RC, RC);
+        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
+        (void) Emitted;
+      }
+  }
+
+  // Add function live-ins to entry block live-in set.
+  for (MachineRegisterInfo::livein_iterator I = livein_begin(),
+       E = livein_end(); I != E; ++I)
+    EntryMBB->addLiveIn(I->first);
+}
+
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
   for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index b79cdbb..b8996d4 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -21,34 +21,50 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
-typedef std::vector<std::pair<MachineBasicBlock*, unsigned> >
-                IncomingPredInfoTy;
+/// BBInfo - Per-basic block information used internally by MachineSSAUpdater.
+class MachineSSAUpdater::BBInfo {
+public:
+  MachineBasicBlock *BB; // Back-pointer to the corresponding block.
+  unsigned AvailableVal; // Value to use in this block.
+  BBInfo *DefBB;         // Block that defines the available value.
+  int BlkNum;            // Postorder number.
+  BBInfo *IDom;          // Immediate dominator.
+  unsigned NumPreds;     // Number of predecessor blocks.
+  BBInfo **Preds;        // Array[NumPreds] of predecessor blocks.
+  MachineInstr *PHITag;  // Marker for existing PHIs that match.
+
+  BBInfo(MachineBasicBlock *ThisBB, unsigned V)
+    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
+      NumPreds(0), Preds(0), PHITag(0) { }
+};
+
+typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy;
 
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
-  return *static_cast<IncomingPredInfoTy*>(IPI);
+static BBMapTy *getBBMap(void *BM) {
+  return static_cast<BBMapTy*>(BM);
 }
 
-
 MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
                                      SmallVectorImpl<MachineInstr*> *NewPHI)
-  : AV(0), IPI(0), InsertedPHIs(NewPHI) {
+  : AV(0), BM(0), InsertedPHIs(NewPHI) {
   TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 }
 
 MachineSSAUpdater::~MachineSSAUpdater() {
   delete &getAvailableVals(AV);
-  delete &getIncomingPredInfo(IPI);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
@@ -59,11 +75,6 @@ void MachineSSAUpdater::Initialize(unsigned V) {
   else
     getAvailableVals(AV).clear();
 
-  if (IPI == 0)
-    IPI = new IncomingPredInfoTy();
-  else
-    getIncomingPredInfo(IPI).clear();
-
   VR = V;
   VRC = MRI->getRegClass(VR);
 }
@@ -127,7 +138,7 @@ MachineInstr *InsertNewDef(unsigned Opcode,
   unsigned NewVR = MRI->createVirtualRegister(RC);
   return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
 }
-                          
+
 /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
 /// is live in the middle of the specified block.
 ///
@@ -150,7 +161,7 @@ MachineInstr *InsertNewDef(unsigned Opcode,
 unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
-  if (!getAvailableVals(AV).count(BB))
+  if (!HasValueForBlock(BB))
     return GetValueAtEndOfBlockInternal(BB);
 
   // If there are no predecessors, just return undef.
@@ -254,141 +265,436 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
 
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
-/// walking predecessors inserting PHI nodes as needed until we get to a block
-/// where the value is available.
-///
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
 unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (unsigned V = AvailableVals[BB])
+    return V;
 
-  // Query AvailableVals by doing an insertion of null.
-  std::pair<AvailableValsTy::iterator, bool> InsertRes =
-    AvailableVals.insert(std::make_pair(BB, 0));
-
-  // Handle the case when the insertion fails because we have already seen BB.
-  if (!InsertRes.second) {
-    // If the insertion failed, there are two cases.  The first case is that the
-    // value is already available for the specified block.  If we get this, just
-    // return the value.
-    if (InsertRes.first->second != 0)
-      return InsertRes.first->second;
-
-    // Otherwise, if the value we find is null, then this is the value is not
-    // known but it is being computed elsewhere in our recursion.  This means
-    // that we have a cycle.  Handle this by inserting a PHI node and returning
-    // it.  When we get back to the first instance of the recursion we will fill
-    // in the PHI node.
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
-                                        VRC, MRI,TII);
-    unsigned NewVR = NewPHI->getOperand(0).getReg();
-    InsertRes.first->second = NewVR;
-    return NewVR;
-  }
+  // Pool allocation used internally by GetValueAtEndOfBlock.
+  BumpPtrAllocator Allocator;
+  BBMapTy BBMapObj;
+  BM = &BBMapObj;
 
-  // If there are no predecessors, then we must have found an unreachable block
-  // just return 'undef'.  Since there are no predecessors, InsertRes must not
-  // be invalidated.
-  if (BB->pred_empty()) {
+  SmallVector<BBInfo*, 100> BlockList;
+  BuildBlockList(BB, &BlockList, &Allocator);
+
+  // Special case: bail out if BB is unreachable.
+  if (BlockList.size() == 0) {
+    BM = 0;
     // Insert an implicit_def to represent an undef value.
     MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
                                         BB, BB->getFirstTerminator(),
                                         VRC, MRI, TII);
-    return InsertRes.first->second = NewDef->getOperand(0).getReg();
+    unsigned V = NewDef->getOperand(0).getReg();
+    AvailableVals[BB] = V;
+    return V;
   }
 
-  // Okay, the value isn't in the map and we just inserted a null in the entry
-  // to indicate that we're processing the block.  Since we have no idea what
-  // value is in this block, we have to recurse through our predecessors.
-  //
-  // While we're walking our predecessors, we keep track of them in a vector,
-  // then insert a PHI node in the end if we actually need one.  We could use a
-  // smallvector here, but that would take a lot of stack space for every level
-  // of the recursion, just use IncomingPredInfo as an explicit stack.
-  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
-  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
-
-  // As we're walking the predecessors, keep track of whether they are all
-  // producing the same value.  If so, this value will capture it, if not, it
-  // will get reset to null.  We distinguish the no-predecessor case explicitly
-  // below.
-  unsigned SingularValue = 0;
-  bool isFirstPred = true;
+  FindDominators(&BlockList);
+  FindPHIPlacement(&BlockList);
+  FindAvailableVals(&BlockList);
+
+  BM = 0;
+  return BBMapObj[BB]->DefBB->AvailableVal;
+}
+
+/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+/// vector, set Info->NumPreds, and allocate space in Info->Preds.
+static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info,
+                                  SmallVectorImpl<MachineBasicBlock*> *Preds,
+                                  BumpPtrAllocator *Allocator) {
+  MachineBasicBlock *BB = Info->BB;
   for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
-         E = BB->pred_end(); PI != E; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-    unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
-    IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+         E = BB->pred_end(); PI != E; ++PI)
+    Preds->push_back(*PI);
 
-    // Compute SingularValue.
-    if (isFirstPred) {
-      SingularValue = PredVal;
-      isFirstPred = false;
-    } else if (PredVal != SingularValue)
-      SingularValue = 0;
+  Info->NumPreds = Preds->size();
+  Info->Preds = static_cast<MachineSSAUpdater::BBInfo**>
+    (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*),
+                         AlignOf<MachineSSAUpdater::BBInfo*>::Alignment));
+}
+
+/// BuildBlockList - Starting from the specified basic block, traverse back
+/// through its predecessors until reaching blocks with known values.  Create
+/// BBInfo structures for the blocks and append them to the block list.
+void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB,
+                                       BlockListTy *BlockList,
+                                       BumpPtrAllocator *Allocator) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<BBInfo*, 10> RootList;
+  SmallVector<BBInfo*, 64> WorkList;
+
+  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
+  (*BBMap)[BB] = Info;
+  WorkList.push_back(Info);
+
+  // Search backward from BB, creating BBInfos along the way and stopping when
+  // reaching blocks that define the value.  Record those defining blocks on
+  // the RootList.
+  SmallVector<MachineBasicBlock*, 10> Preds;
+  while (!WorkList.empty()) {
+    Info = WorkList.pop_back_val();
+    Preds.clear();
+    FindPredecessorBlocks(Info, &Preds, Allocator);
+
+    // Treat an unreachable predecessor as a definition with 'undef'.
+    if (Info->NumPreds == 0) {
+      // Insert an implicit_def to represent an undef value.
+      MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+                                          Info->BB,
+                                          Info->BB->getFirstTerminator(),
+                                          VRC, MRI, TII);
+      Info->AvailableVal = NewDef->getOperand(0).getReg();
+      Info->DefBB = Info;
+      RootList.push_back(Info);
+      continue;
+    }
+
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      MachineBasicBlock *Pred = Preds[p];
+      // Check if BBMap already has a BBInfo for the predecessor block.
+      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
+      if (BBMapBucket.second) {
+        Info->Preds[p] = BBMapBucket.second;
+        continue;
+      }
+
+      // Create a new BBInfo for the predecessor.
+      unsigned PredVal = AvailableVals.lookup(Pred);
+      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
+      BBMapBucket.second = PredInfo;
+      Info->Preds[p] = PredInfo;
+
+      if (PredInfo->AvailableVal) {
+        RootList.push_back(PredInfo);
+        continue;
+      }
+      WorkList.push_back(PredInfo);
+    }
+  }
+
+  // Now that we know what blocks are backwards-reachable from the starting
+  // block, do a forward depth-first traversal to assign postorder numbers
+  // to those blocks.
+  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
+  unsigned BlkNum = 1;
+
+  // Initialize the worklist with the roots from the backward traversal.
+  while (!RootList.empty()) {
+    Info = RootList.pop_back_val();
+    Info->IDom = PseudoEntry;
+    Info->BlkNum = -1;
+    WorkList.push_back(Info);
+  }
+
+  while (!WorkList.empty()) {
+    Info = WorkList.back();
+
+    if (Info->BlkNum == -2) {
+      // All the successors have been handled; assign the postorder number.
+      Info->BlkNum = BlkNum++;
+      // If not a root, put it on the BlockList.
+      if (!Info->AvailableVal)
+        BlockList->push_back(Info);
+      WorkList.pop_back();
+      continue;
+    }
+
+    // Leave this entry on the worklist, but set its BlkNum to mark that its
+    // successors have been put on the worklist.  When it returns to the top
+    // the list, after handling its successors, it will be assigned a number.
+    Info->BlkNum = -2;
+
+    // Add unvisited successors to the work list.
+    for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(),
+           E = Info->BB->succ_end(); SI != E; ++SI) {
+      BBInfo *SuccInfo = (*BBMap)[*SI];
+      if (!SuccInfo || SuccInfo->BlkNum)
+        continue;
+      SuccInfo->BlkNum = -1;
+      WorkList.push_back(SuccInfo);
+    }
   }
+  PseudoEntry->BlkNum = BlkNum;
+}
 
-  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
-  /// this block is involved in a loop, a no-entry PHI node will have been
-  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
-  /// above.
-  unsigned &InsertedVal = AvailableVals[BB];
-
-  // If all the predecessor values are the same then we don't need to insert a
-  // PHI.  This is the simple and common case.
-  if (SingularValue) {
-    // If a PHI node got inserted, replace it with the singlar value and delete
-    // it.
-    if (InsertedVal) {
-      MachineInstr *OldVal = MRI->getVRegDef(InsertedVal);
-      // Be careful about dead loops.  These RAUW's also update InsertedVal.
-      assert(InsertedVal != SingularValue && "Dead loop?");
-      ReplaceRegWith(InsertedVal, SingularValue);
-      OldVal->eraseFromParent();
+/// IntersectDominators - This is the dataflow lattice "meet" operation for
+/// finding dominators.  Given two basic blocks, it walks up the dominator
+/// tree until it finds a common dominator of both.  It uses the postorder
+/// number of the blocks to determine how to do that.
+static MachineSSAUpdater::BBInfo *
+IntersectDominators(MachineSSAUpdater::BBInfo *Blk1,
+                    MachineSSAUpdater::BBInfo *Blk2) {
+  while (Blk1 != Blk2) {
+    while (Blk1->BlkNum < Blk2->BlkNum) {
+      Blk1 = Blk1->IDom;
+      if (!Blk1)
+        return Blk2;
     }
+    while (Blk2->BlkNum < Blk1->BlkNum) {
+      Blk2 = Blk2->IDom;
+      if (!Blk2)
+        return Blk1;
+    }
+  }
+  return Blk1;
+}
 
-    InsertedVal = SingularValue;
+/// FindDominators - Calculate the dominator tree for the subset of the CFG
+/// corresponding to the basic blocks on the BlockList.  This uses the
+/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
+/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
+/// Because the CFG subset does not include any edges leading into blocks that
+/// define the value, the results are not the usual dominator tree.  The CFG
+/// subset has a single pseudo-entry node with edges to a set of root nodes
+/// for blocks that define the value.  The dominators for this subset CFG are
+/// not the standard dominators but they are adequate for placing PHIs within
+/// the subset CFG.
+void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // Start with the first predecessor.
+      assert(Info->NumPreds > 0 && "unreachable block");
+      BBInfo *NewIDom = Info->Preds[0];
+
+      // Iterate through the block's other predecessors.
+      for (unsigned p = 1; p != Info->NumPreds; ++p) {
+        BBInfo *Pred = Info->Preds[p];
+        NewIDom = IntersectDominators(NewIDom, Pred);
+      }
 
-    // Drop the entries we added in IncomingPredInfo to restore the stack.
-    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                           IncomingPredInfo.end());
-    return InsertedVal;
+      // Check if the IDom value has changed.
+      if (NewIDom != Info->IDom) {
+        Info->IDom = NewIDom;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
+
+/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
+/// any blocks containing definitions of the value.  If one is found, then the
+/// successor of Pred is in the dominance frontier for the definition, and
+/// this function returns true.
+static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred,
+                               const MachineSSAUpdater::BBInfo *IDom) {
+  for (; Pred != IDom; Pred = Pred->IDom) {
+    if (Pred->DefBB == Pred)
+      return true;
   }
+  return false;
+}
 
+/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
+/// the known definitions.  Iteratively add PHIs in the dom frontiers until
+/// nothing changes.  Along the way, keep track of the nearest dominating
+/// definitions for non-PHI blocks.
+void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // If this block already needs a PHI, there is nothing to do here.
+      if (Info->DefBB == Info)
+        continue;
+
+      // Default to use the same def as the immediate dominator.
+      BBInfo *NewDefBB = Info->IDom->DefBB;
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
+          // Need a PHI here.
+          NewDefBB = Info;
+          break;
+        }
+      }
 
-  // Otherwise, we do need a PHI: insert one now if we don't already have one.
-  MachineInstr *InsertedPHI;
-  if (InsertedVal == 0) {
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
-                               VRC, MRI, TII);
-    InsertedVal = InsertedPHI->getOperand(0).getReg();
-  } else {
-    InsertedPHI = MRI->getVRegDef(InsertedVal);
+      // Check if anything changed.
+      if (NewDefBB != Info->DefBB) {
+        Info->DefBB = NewDefBB;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
+
+/// FindAvailableVal - If this block requires a PHI, first check if an existing
+/// PHI matches the PHI placement and reaching definitions computed earlier,
+/// and if not, create a new PHI.  Visit all the block's predecessors to
+/// calculate the available value for each one and fill in the incoming values
+/// for a new PHI.
+void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+
+  // Go through the worklist in forward order (i.e., backward through the CFG)
+  // and check if existing PHIs can be used.  If not, create empty PHIs where
+  // they are needed.
+  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+       I != E; ++I) {
+    BBInfo *Info = *I;
+    // Check if there needs to be a PHI in BB.
+    if (Info->DefBB != Info)
+      continue;
+
+    // Look for an existing PHI.
+    FindExistingPHI(Info->BB, BlockList);
+    if (Info->AvailableVal)
+      continue;
+
+    MachineBasicBlock::iterator Loc =
+      Info->BB->empty() ? Info->BB->end() : Info->BB->front();
+    MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc,
+                                             VRC, MRI, TII);
+    unsigned PHI = InsertedPHI->getOperand(0).getReg();
+    Info->AvailableVal = PHI;
+    AvailableVals[Info->BB] = PHI;
   }
 
-  // Fill in all the predecessors of the PHI.
-  MachineInstrBuilder MIB(InsertedPHI);
-  for (IncomingPredInfoTy::iterator I =
-         IncomingPredInfo.begin()+FirstPredInfoEntry,
-         E = IncomingPredInfo.end(); I != E; ++I)
-    MIB.addReg(I->second).addMBB(I->first);
+  // Now go back through the worklist in reverse order to fill in the arguments
+  // for any new PHIs added in the forward traversal.
+  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+         E = BlockList->rend(); I != E; ++I) {
+    BBInfo *Info = *I;
+
+    if (Info->DefBB != Info) {
+      // Record the available value at join nodes to speed up subsequent
+      // uses of this SSAUpdater for the same value.
+      if (Info->NumPreds > 1)
+        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
+      continue;
+    }
 
-  // Drop the entries we added in IncomingPredInfo to restore the stack.
-  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                         IncomingPredInfo.end());
+    // Check if this block contains a newly added PHI.
+    unsigned PHI = Info->AvailableVal;
+    MachineInstr *InsertedPHI = MRI->getVRegDef(PHI);
+    if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1)
+      continue;
+
+    // Iterate through the block's predecessors.
+    MachineInstrBuilder MIB(InsertedPHI);
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      BBInfo *PredInfo = Info->Preds[p];
+      MachineBasicBlock *Pred = PredInfo->BB;
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+      MIB.addReg(PredInfo->AvailableVal).addMBB(Pred);
+    }
 
-  // See if the PHI node can be merged to a single value.  This can happen in
-  // loop cases when we get a PHI of itself and one other value.
-  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
-    MRI->replaceRegWith(InsertedVal, ConstVal);
-    InsertedPHI->eraseFromParent();
-    InsertedVal = ConstVal;
-  } else {
     DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
     if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
   }
+}
 
-  return InsertedVal;
+/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+/// them match what is needed.
+void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB,
+                                        BlockListTy *BlockList) {
+  for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
+       BBI != BBE && BBI->isPHI(); ++BBI) {
+    if (CheckIfPHIMatches(BBI)) {
+      RecordMatchingPHI(BBI);
+      break;
+    }
+    // Match failed: clear all the PHITag values.
+    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+         I != E; ++I)
+      (*I)->PHITag = 0;
+  }
+}
+
+/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+/// in the BBMap.
+bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<MachineInstr*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Mark that the block containing this PHI has been visited.
+  (*BBMap)[PHI->getParent()]->PHITag = PHI;
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
+      unsigned IncomingVal = PHI->getOperand(i).getReg();
+      BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()];
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+
+      // Check if it matches the expected value.
+      if (PredInfo->AvailableVal) {
+        if (IncomingVal == PredInfo->AvailableVal)
+          continue;
+        return false;
+      }
+
+      // Check if the value is a PHI in the correct block.
+      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
+      if (!IncomingPHIVal->isPHI() ||
+          IncomingPHIVal->getParent() != PredInfo->BB)
+        return false;
+
+      // If this block has already been visited, check if this PHI matches.
+      if (PredInfo->PHITag) {
+        if (IncomingPHIVal == PredInfo->PHITag)
+          continue;
+        return false;
+      }
+      PredInfo->PHITag = IncomingPHIVal;
+
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
+  return true;
+}
+
+/// RecordMatchingPHI - For a PHI node that matches, record it and its input
+/// PHIs in both the BBMap and the AvailableVals mapping.
+void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  SmallVector<MachineInstr*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Record this PHI.
+  MachineBasicBlock *BB = PHI->getParent();
+  AvailableVals[BB] = PHI->getOperand(0).getReg();
+  (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg();
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
+      unsigned IncomingVal = PHI->getOperand(i).getReg();
+      MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
+      if (!IncomingPHIVal->isPHI()) continue;
+      BB = IncomingPHIVal->getParent();
+      BBInfo *Info = (*BBMap)[BB];
+      if (!Info || Info->AvailableVal)
+        continue;
+
+      // Record the PHI and add it to the worklist.
+      AvailableVals[BB] = IncomingVal;
+      Info->AvailableVal = IncomingVal;
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
 }
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index e659619..ef489dc 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -37,6 +38,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo  *RegInfo; // Machine register information
     MachineDominatorTree *DT;   // Machine dominator tree
+    MachineLoopInfo *LI;
     AliasAnalysis *AA;
     BitVector AllocatableSet;   // Which physregs are allocatable?
 
@@ -51,7 +53,9 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineDominatorTree>();
+      AU.addPreserved<MachineLoopInfo>();
     }
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
@@ -102,6 +106,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   TRI = TM.getRegisterInfo();
   RegInfo = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
@@ -276,8 +281,29 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // but for now we just punt.
   // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
-    DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
-    return false;
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    bool store = true;
+    if (!MI->isSafeToMove(TII, AA, store)) {
+      DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+      return false;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+      return false;
+    }
+
+    // Don't sink instructions into a loop.
+    if (LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+      return false;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    DEBUG(dbgs() << "Sinking along critical edge.\n");
   }
   
   // Determine where to insert into.  Skip phi nodes.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 434a1e8..0b75c55 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -279,7 +279,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   if (OutFile)
     delete OutFile;
   else if (foundErrors)
-    llvm_report_error("Found "+Twine(foundErrors)+" machine code errors.");
+    report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
 
   // Clean up.
   regsLive.clear();
@@ -351,8 +351,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
 }
 
 // Does iterator point to a and b as the first two elements?
-bool matchPair(MachineBasicBlock::const_succ_iterator i,
-               const MachineBasicBlock *a, const MachineBasicBlock *b) {
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+                      const MachineBasicBlock *a, const MachineBasicBlock *b) {
   if (*i == a)
     return *++i == b;
   if (*i == b)
@@ -470,7 +470,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   }
 
   regsLive.clear();
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I) {
     if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
       report("MBB live-in list contains non-physical register", MBB);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 424181c..d3e1295 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -46,7 +46,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
-#include <map>
 #include <set>
 using namespace llvm;
 
@@ -266,6 +265,17 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     // Initialize register live-range state for scheduling in this block.
     Scheduler.StartBlock(MBB);
 
+    // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA
+    // scheduler has some sort of problem with DebugValue instructions that
+    // causes an assertion in LeaksContext.h to fail occasionally.  Just
+    // remove all those instructions for now.
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ) {
+      MachineInstr *MI = &*I++;
+      if (MI->isDebugValue())
+        MI->eraseFromParent();
+    }
+
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
     MachineBasicBlock::iterator Current = MBB->end();
@@ -274,7 +284,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       MachineInstr *MI = prior(I);
       if (isSchedulingBoundary(MI, Fn)) {
         Scheduler.Run(MBB, I, Current, CurrentCount);
-        Scheduler.EmitSchedule(0);
+        Scheduler.EmitSchedule();
         Current = MI;
         CurrentCount = Count - 1;
         Scheduler.Observe(MI, CurrentCount);
@@ -286,7 +296,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     assert((MBB->begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
     Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
-    Scheduler.EmitSchedule(0);
+    Scheduler.EmitSchedule();
 
     // Clean up register live-range state.
     Scheduler.FinishBlock();
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 27cb566..a454b62 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -131,10 +131,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
-  bool HasCalls = FFI->hasCalls();
+  bool HasCalls = MFI->hasCalls();
 
   // Get the function call frame set-up and tear-down instruction opcode
   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
@@ -162,8 +162,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
         HasCalls = true;
       }
 
-  FFI->setHasCalls(HasCalls);
-  FFI->setMaxCallFrameSize(MaxCallFrameSize);
+  MFI->setHasCalls(HasCalls);
+  MFI->setMaxCallFrameSize(MaxCallFrameSize);
 
   for (std::vector<MachineBasicBlock::iterator>::iterator
          i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
@@ -184,7 +184,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
 void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
   const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Get the callee saved register list...
   const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
@@ -197,6 +197,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   if (CSRegs == 0 || CSRegs[0] == 0)
     return;
 
+  // In Naked functions we aren't going to save any registers.
+  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+    return;
+
   // Figure out which *callee saved* registers are modified by the current
   // function, thus needing to be saved and restored in the prolog/epilog.
   const TargetRegisterClass * const *CSRegClasses =
@@ -255,19 +259,19 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
       // the TargetRegisterClass if the stack alignment is smaller. Use the
       // min.
       Align = std::min(Align, StackAlign);
-      FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true);
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
       if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
     } else {
       // Spill it to the stack where we must.
-      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
                                         true, false);
     }
 
     I->setFrameIdx(FrameIdx);
   }
 
-  FFI->setCalleeSavedInfo(CSI);
+  MFI->setCalleeSavedInfo(CSI);
 }
 
 /// insertCSRSpillsAndRestores - Insert spill and restore code for
@@ -275,10 +279,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
 ///
 void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   // Get callee saved register information.
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
 
-  FFI->setCalleeSavedInfoValid(true);
+  MFI->setCalleeSavedInfoValid(true);
 
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
@@ -436,14 +440,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
 static inline void
-AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += FFI->getObjectSize(FrameIdx);
+    Offset += MFI->getObjectSize(FrameIdx);
 
-  unsigned Align = FFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -453,10 +457,10 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
   Offset = (Offset + Align - 1) / Align * Align;
 
   if (StackGrowsDown) {
-    FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
   } else {
-    FFI->setObjectOffset(FrameIdx, Offset);
-    Offset += FFI->getObjectSize(FrameIdx);
+    MFI->setObjectOffset(FrameIdx, Offset);
+    Offset += MFI->getObjectSize(FrameIdx);
   }
 }
 
@@ -470,7 +474,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
@@ -487,17 +491,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // We currently don't support filling in holes in between fixed sized
   // objects, so we adjust 'Offset' to point to the end of last fixed sized
   // preallocated object.
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
     int64_t FixedOff;
     if (StackGrowsDown) {
       // The maximum distance from the stack pointer is at lower address of
       // the object -- which is given by offset. For down growing stack
       // the offset is negative, so we negate the offset to get the distance.
-      FixedOff = -FFI->getObjectOffset(i);
+      FixedOff = -MFI->getObjectOffset(i);
     } else {
       // The maximum distance from the start pointer is at the upper
       // address of the object.
-      FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
     }
     if (FixedOff > Offset) Offset = FixedOff;
   }
@@ -506,29 +510,29 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // callee saved registers.
   if (StackGrowsDown) {
     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
-      // If stack grows down, we need to add size of find the lowest
+      // If the stack grows down, we need to add the size to find the lowest
       // address of the object.
-      Offset += FFI->getObjectSize(i);
+      Offset += MFI->getObjectSize(i);
 
-      unsigned Align = FFI->getObjectAlignment(i);
+      unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
-      FFI->setObjectOffset(i, -Offset);        // Set the computed offset
+      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
     }
   } else {
     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
-      unsigned Align = FFI->getObjectAlignment(i);
+      unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
-      FFI->setObjectOffset(i, Offset);
-      Offset += FFI->getObjectSize(i);
+      MFI->setObjectOffset(i, Offset);
+      Offset += MFI->getObjectSize(i);
     }
   }
 
-  unsigned MaxAlign = FFI->getMaxAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
 
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
@@ -536,28 +540,28 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
-      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
   // Make sure that the stack protector comes before the local variables on the
   // stack.
-  if (FFI->getStackProtectorIndex() >= 0)
-    AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
+  if (MFI->getStackProtectorIndex() >= 0)
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
                       Offset, MaxAlign);
 
   // Then assign frame offsets to stack objects that are not used to spill
   // callee saved registers.
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
       continue;
     if (RS && (int)i == RS->getScavengingFrameIndex())
       continue;
-    if (FFI->isDeadObjectIndex(i))
+    if (MFI->isDeadObjectIndex(i))
       continue;
-    if (FFI->getStackProtectorIndex() == (int)i)
+    if (MFI->getStackProtectorIndex() == (int)i)
       continue;
 
-    AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
   }
 
   // Make sure the special register scavenging spill slot is closest to the
@@ -565,15 +569,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
-      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
   if (!RegInfo->targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
-      Offset += FFI->getMaxCallFrameSize();
+    if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
+      Offset += MFI->getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
     // any calls or alloca's, align to the target's StackAlignment value to
@@ -581,8 +585,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // otherwise, for leaf functions, align to the TransientStackAlignment
     // value.
     unsigned StackAlign;
-    if (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
-        (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))
+    if (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
       StackAlign = TFI.getStackAlignment();
     else
       StackAlign = TFI.getTransientStackAlignment();
@@ -594,7 +598,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   }
 
   // Update frame info to pretend that this is part of the stack...
-  FFI->setStackSize(Offset - LocalAreaOffset);
+  MFI->setStackSize(Offset - LocalAreaOffset);
 }
 
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 0000000..2caf1df
--- /dev/null
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,932 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+static RegisterRegAlloc
+  fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+  class RAFast : public MachineFunctionPass {
+  public:
+    static char ID;
+    RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+  private:
+    const TargetMachine *TM;
+    MachineFunction *MF;
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+
+    // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+    // values are spilled.
+    IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+    // Virt2PhysRegMap - This map contains entries for each virtual register
+    // that is currently available in a physical register.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+    unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+      return Virt2PhysRegMap[VirtReg];
+    }
+
+    // PhysRegsUsed - This array is effectively a map, containing entries for
+    // each physical register that currently has a value (ie, it is in
+    // Virt2PhysRegMap).  The value mapped to is the virtual register
+    // corresponding to the physical register (the inverse of the
+    // Virt2PhysRegMap), or 0.  The value is set to 0 if this register is pinned
+    // because it is used by a future instruction, and to -2 if it is not
+    // allocatable.  If the entry for a physical register is -1, then the
+    // physical register is "not in the map".
+    //
+    std::vector<int> PhysRegsUsed;
+
+    // UsedInInstr - BitVector of physregs that are used in the current
+    // instruction, and so cannot be allocated.
+    BitVector UsedInInstr;
+
+    // Virt2LastUseMap - This maps each virtual register to its last use
+    // (MachineInstr*, operand index pair).
+    IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
+    Virt2LastUseMap;
+
+    std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      return Virt2LastUseMap[Reg];
+    }
+
+    // VirtRegModified - This bitset contains information about which virtual
+    // registers need to be spilled back to memory when their registers are
+    // scavenged.  If a virtual register has simply been rematerialized, there
+    // is no reason to spill it to memory when we need the register back.
+    //
+    BitVector VirtRegModified;
+
+    // UsedInMultipleBlocks - Tracks whether a particular register is used in
+    // more than one block.
+    BitVector UsedInMultipleBlocks;
+
+    void markVirtRegModified(unsigned Reg, bool Val = true) {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      Reg -= TargetRegisterInfo::FirstVirtualRegister;
+      if (Val)
+        VirtRegModified.set(Reg);
+      else
+        VirtRegModified.reset(Reg);
+    }
+
+    bool isVirtRegModified(unsigned Reg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
+             VirtRegModified.size() && "Illegal virtual register!");
+      return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+    }
+
+  public:
+    virtual const char *getPassName() const {
+      return "Fast Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<LiveVariables>();
+      AU.addRequiredID(PHIEliminationID);
+      AU.addRequiredID(TwoAddressInstructionPassID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// runOnMachineFunction - Register allocate the whole function
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+    /// AllocateBasicBlock - Register allocate the specified basic block.
+    void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+    /// areRegsEqual - This method returns true if the specified registers are
+    /// related to each other.  To do this, it checks to see if they are equal
+    /// or if the first register is in the alias set of the second register.
+    ///
+    bool areRegsEqual(unsigned R1, unsigned R2) const {
+      if (R1 == R2) return true;
+      for (const unsigned *AliasSet = TRI->getAliasSet(R2);
+           *AliasSet; ++AliasSet) {
+        if (*AliasSet == R1) return true;
+      }
+      return false;
+    }
+
+    /// getStackSpaceFor - This returns the frame index of the specified virtual
+    /// register on the stack, allocating space if necessary.
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+    /// removePhysReg - This method marks the specified physical register as no
+    /// longer being in use.
+    ///
+    void removePhysReg(unsigned PhysReg);
+
+    /// spillVirtReg - This method spills the value specified by PhysReg into
+    /// the virtual register slot specified by VirtReg.  It then updates the RA
+    /// data structures to indicate the fact that PhysReg is now available.
+    ///
+    void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                      unsigned VirtReg, unsigned PhysReg);
+
+    /// spillPhysReg - This method spills the specified physical register into
+    /// the virtual register slot associated with it.  If OnlyVirtRegs is set to
+    /// true, then the request is ignored if the physical register does not
+    /// contain a virtual register.
+    ///
+    void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                      unsigned PhysReg, bool OnlyVirtRegs = false);
+
+    /// assignVirtToPhysReg - This method updates local state so that we know
+    /// that PhysReg is the proper container for VirtReg now.  The physical
+    /// register must not be used for anything else when this is called.
+    ///
+    void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+    /// isPhysRegAvailable - Return true if the specified physical register is
+    /// free and available for use.  This also includes checking to see if
+    /// aliased registers are all free...
+    ///
+    bool isPhysRegAvailable(unsigned PhysReg) const;
+
+    /// isPhysRegSpillable - Can PhysReg be freed by spilling?
+    bool isPhysRegSpillable(unsigned PhysReg) const;
+
+    /// getFreeReg - Look to see if there is a free register available in the
+    /// specified register class.  If not, return 0.
+    ///
+    unsigned getFreeReg(const TargetRegisterClass *RC);
+
+    /// getReg - Find a physical register to hold the specified virtual
+    /// register.  If all compatible physical registers are used, this method
+    /// spills the last used virtual register to the stack, and uses that
+    /// register. If NoFree is true, that means the caller knows there isn't
+    /// a free register, do not call getFreeReg().
+    unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                    unsigned VirtReg, bool NoFree = false);
+
+    /// reloadVirtReg - This method transforms the specified virtual
+    /// register use to refer to a physical register.  This method may do this
+    /// in one of several ways: if the register is available in a physical
+    /// register already, it uses that physical register.  If the value is not
+    /// in a physical register, and if there are physical registers available,
+    /// it loads it into a register: PhysReg if that is an available physical
+    /// register, otherwise any physical register of the right class.
+    /// If register pressure is high, and it is possible, it tries to fold the
+    /// load of the virtual register into the instruction itself.  It avoids
+    /// doing this if register pressure is low to improve the chance that
+    /// subsequent instructions can use the reloaded value.  This method
+    /// returns the modified instruction.
+    ///
+    MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
+                                unsigned PhysReg);
+
+    void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+                       unsigned PhysReg);
+  };
+  char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+  // Find the location Reg would belong...
+  int SS = StackSlotForVirtReg[VirtReg];
+  if (SS != -1)
+    return SS;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                            RC->getAlignment());
+
+  // Assign the slot.
+  StackSlotForVirtReg[VirtReg] = FrameIdx;
+  return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RAFast::removePhysReg(unsigned PhysReg) {
+  PhysRegsUsed[PhysReg] = -1;      // PhyReg no longer used
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg.  It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RAFast::spillVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I,
+                           unsigned VirtReg, unsigned PhysReg) {
+  assert(VirtReg && "Spilling a physical register is illegal!"
+         " Must not have appropriate kill for the register or use exists beyond"
+         " the intended one.");
+  DEBUG(dbgs() << "  Spilling register " << TRI->getName(PhysReg)
+               << " containing %reg" << VirtReg);
+
+  if (!isVirtRegModified(VirtReg)) {
+    DEBUG(dbgs() << " which has not been modified, so no store necessary!");
+    std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
+    if (LastUse.first)
+      LastUse.first->getOperand(LastUse.second).setIsKill();
+  } else {
+    // Otherwise, there is a virtual register corresponding to this physical
+    // register.  We only need to spill it into its stack slot if it has been
+    // modified.
+    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
+    // If the instruction reads the register that's spilled, (e.g. this can
+    // happen if it is a move to a physical register), then the spill
+    // instruction is not a kill.
+    bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
+    TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+    ++NumStores;   // Update statistics
+  }
+
+  getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
+
+  DEBUG(dbgs() << '\n');
+  removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it.  If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+                           unsigned PhysReg, bool OnlyVirtRegs) {
+  if (PhysRegsUsed[PhysReg] != -1) {            // Only spill it if it's used!
+    assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+    if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+      spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+    return;
+  }
+
+  // If the selected register aliases any other registers, we must make
+  // sure that one of the aliases isn't alive.
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet) {
+    if (PhysRegsUsed[*AliasSet] == -1 ||     // Spill aliased register.
+        PhysRegsUsed[*AliasSet] == -2)       // If allocatable.
+      continue;
+
+    if (PhysRegsUsed[*AliasSet])
+      spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+  }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+  assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+  // Update information to note the fact that this register was just used, and
+  // it holds VirtReg.
+  PhysRegsUsed[PhysReg] = VirtReg;
+  getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+  UsedInInstr.set(PhysReg);
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use.  This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RAFast::isPhysRegAvailable(unsigned PhysReg) const {
+  if (PhysRegsUsed[PhysReg] != -1) return false;
+
+  // If the selected register aliases any other allocated registers, it is
+  // not free!
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+      return false;                    // Can't use this reg then.
+  return true;
+}
+
+/// isPhysRegSpillable - Return true if the specified physical register can be
+/// spilled for use in the current instruction.
+///
+bool RAFast::isPhysRegSpillable(unsigned PhysReg) const {
+  // Test that PhysReg and all aliases are either free or assigned to a VirtReg
+  // that is not used in the instruction.
+  if (PhysRegsUsed[PhysReg] != -1 &&
+      (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg)))
+    return false;
+
+  for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+       *AliasSet; ++AliasSet)
+    if (PhysRegsUsed[*AliasSet] != -1 &&
+        (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet)))
+      return false;
+  return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class.  If not, return 0.
+///
+unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) {
+  // Get iterators defining the range of registers that are valid to allocate in
+  // this class, which also specifies the preferred allocation order.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegAvailable(*RI)) {       // Is reg unused?
+      assert(*RI != 0 && "Cannot use register!");
+      return *RI; // Found an unused register!
+    }
+  return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register.  If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+                         unsigned VirtReg, bool NoFree) {
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+
+  // First check to see if we have a free register of the requested type...
+  unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
+
+  if (PhysReg != 0) {
+    // Assign the register.
+    assignVirtToPhysReg(VirtReg, PhysReg);
+    return PhysReg;
+  }
+
+  // If we didn't find an unused register, scavenge one now! Don't be fancy,
+  // just grab the first possible register.
+  TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+  for (; RI != RE; ++RI)
+    if (isPhysRegSpillable(*RI)) {
+      PhysReg = *RI;
+      break;
+    }
+
+  assert(PhysReg && "Physical register not assigned!?!?");
+  spillPhysReg(MBB, I, PhysReg);
+  assignVirtToPhysReg(VirtReg, PhysReg);
+  return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified virtual
+/// register use to refer to a physical register.  This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register.  If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register: PhysReg if that is an available physical register, otherwise any
+/// register.  If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself.  It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value.  This method returns
+/// the modified instruction.
+///
+MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned OpNum,
+                                     SmallSet<unsigned, 4> &ReloadedRegs,
+                                     unsigned PhysReg) {
+  unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+  // If the virtual register is already available, just update the instruction
+  // and return.
+  if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+    MI->getOperand(OpNum).setReg(PR);  // Assign the input register
+    if (!MI->isDebugValue()) {
+      // Do not do these for DBG_VALUE as they can affect codegen.
+      UsedInInstr.set(PR);
+      getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+    }
+    return MI;
+  }
+
+  // Otherwise, we need to fold it into the current instruction, or reload it.
+  // If we have registers available to hold the value, use them.
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+  // If we already have a PhysReg (this happens when the instruction is a
+  // reg-to-reg copy with a PhysReg destination) use that.
+  if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
+      !isPhysRegAvailable(PhysReg))
+    PhysReg = getFreeReg(RC);
+  int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+  if (PhysReg) {   // Register is available, allocate it!
+    assignVirtToPhysReg(VirtReg, PhysReg);
+  } else {         // No registers available.
+    // Force some poor hapless value out of the register file to
+    // make room for the new register, and reload it.
+    PhysReg = getReg(MBB, MI, VirtReg, true);
+  }
+
+  markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
+
+  DEBUG(dbgs() << "  Reloading %reg" << VirtReg << " into "
+               << TRI->getName(PhysReg) << "\n");
+
+  // Add move instruction(s)
+  TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+  ++NumLoads;    // Update statistics
+
+  MF->getRegInfo().setPhysRegUsed(PhysReg);
+  MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
+  getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+
+  if (!ReloadedRegs.insert(PhysReg)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
+    if (MI->isInlineAsm()) {
+      Msg << "\nPlease check your inline asm statement for invalid "
+           << "constraints:\n";
+      MI->print(Msg, TM);
+    }
+    report_fatal_error(Msg.str());
+  }
+  for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+       *SubRegs; ++SubRegs) {
+    if (ReloadedRegs.insert(*SubRegs)) continue;
+
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Ran out of registers during register allocation!";
+    if (MI->isInlineAsm()) {
+      Msg << "\nPlease check your inline asm statement for invalid "
+           << "constraints:\n";
+      MI->print(Msg, TM);
+    }
+    report_fatal_error(Msg.str());
+  }
+
+  return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+        MO.isDef() && !MO.isDead())
+      return true;
+  }
+  return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+        !MO.isDef() && MO.isKill())
+      return true;
+  }
+  return false;
+}
+
+void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) {
+  // loop over each instruction
+  MachineBasicBlock::iterator MII = MBB.begin();
+
+  DEBUG({
+      const BasicBlock *LBB = MBB.getBasicBlock();
+      if (LBB)
+        dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+    });
+
+  // Add live-in registers as active.
+  for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
+         E = MBB.livein_end(); I != E; ++I) {
+    unsigned Reg = *I;
+    MF->getRegInfo().setPhysRegUsed(Reg);
+    PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         *SubRegs; ++SubRegs) {
+      if (PhysRegsUsed[*SubRegs] == -2) continue;
+      PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      MF->getRegInfo().setPhysRegUsed(*SubRegs);
+    }
+  }
+
+  // Otherwise, sequentially allocate each instruction in the MBB.
+  while (MII != MBB.end()) {
+    MachineInstr *MI = MII++;
+    const TargetInstrDesc &TID = MI->getDesc();
+    DEBUG({
+        dbgs() << "\nStarting RegAlloc of: " << *MI;
+        dbgs() << "  Regs have values: ";
+        for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+            dbgs() << "[" << TRI->getName(i)
+                   << ",%reg" << PhysRegsUsed[i] << "] ";
+        dbgs() << '\n';
+      });
+
+    // Track registers used by instruction.
+    UsedInInstr.reset();
+
+    // Determine whether this is a copy instruction.  The cases where the
+    // source or destination are phys regs are handled specially.
+    unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
+    unsigned SrcCopyPhysReg = 0U;
+    bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+                                   SrcCopySubReg, DstCopySubReg);
+    if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
+      SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
+
+    // Loop over the implicit uses, making sure they don't get reallocated.
+    if (TID.ImplicitUses) {
+      for (const unsigned *ImplicitUses = TID.ImplicitUses;
+           *ImplicitUses; ++ImplicitUses)
+        UsedInInstr.set(*ImplicitUses);
+    }
+
+    SmallVector<unsigned, 8> Kills;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isKill()) continue;
+
+      if (!MO.isImplicit())
+        Kills.push_back(MO.getReg());
+      else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+        // These are extra physical register kills when a sub-register
+        // is defined (def of a sub-register is a read/mod/write of the
+        // larger registers). Ignore.
+        Kills.push_back(MO.getReg());
+    }
+
+    // If any physical regs are earlyclobber, spill any value they might
+    // have in them, then mark them unallocatable.
+    // If any virtual regs are earlyclobber, allocate them now (before
+    // freeing inputs that are killed).
+    if (MI->isInlineAsm()) {
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
+            !MO.getReg())
+          continue;
+
+        if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+          unsigned DestVirtReg = MO.getReg();
+          unsigned DestPhysReg;
+
+          // If DestVirtReg already has a value, use it.
+          if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+            DestPhysReg = getReg(MBB, MI, DestVirtReg);
+          MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+          markVirtRegModified(DestVirtReg);
+          getVirtRegLastUse(DestVirtReg) =
+                 std::make_pair((MachineInstr*)0, 0);
+          DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                       << " to %reg" << DestVirtReg << "\n");
+          MO.setReg(DestPhysReg);  // Assign the earlyclobber register
+        } else {
+          unsigned Reg = MO.getReg();
+          if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+          // These are extra physical register defs when a sub-register
+          // is defined (def of a sub-register is a read/mod/write of the
+          // larger registers). Ignore.
+          if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+          MF->getRegInfo().setPhysRegUsed(Reg);
+          spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+
+          for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+               *SubRegs; ++SubRegs) {
+            if (PhysRegsUsed[*SubRegs] == -2) continue;
+            MF->getRegInfo().setPhysRegUsed(*SubRegs);
+            PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+          }
+        }
+      }
+    }
+
+    // If a DBG_VALUE says something is located in a spilled register,
+    // change the DBG_VALUE to be undef, which prevents the register
+    // from being reloaded here.  Doing that would change the generated
+    // code, unless another use immediately follows this instruction.
+    if (MI->isDebugValue() &&
+        MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
+      unsigned VirtReg = MI->getOperand(0).getReg();
+      if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+          !getVirt2PhysRegMapSlot(VirtReg))
+        MI->getOperand(0).setReg(0U);
+    }
+
+    // Get the used operands into registers.  This has the potential to spill
+    // incoming values if we are out of registers.  Note that we completely
+    // ignore physical register uses here.  We assume that if an explicit
+    // physical register is referenced by the instruction, that it is guaranteed
+    // to be live-in, or the input is badly hosed.
+    //
+    SmallSet<unsigned, 4> ReloadedRegs;
+    for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      // here we are looking for only used operands (never def&use)
+      if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
+                           isCopy ? DstCopyReg : 0);
+    }
+
+    // If this instruction is the last user of this register, kill the
+    // value, freeing the register being used, so it doesn't need to be
+    // spilled to memory.
+    //
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+      unsigned VirtReg = Kills[i];
+      unsigned PhysReg = VirtReg;
+      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        // If the virtual register was never materialized into a register, it
+        // might not be in the map, but it won't hurt to zero it out anyway.
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else {
+        assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+               "Silently clearing a virtual register?");
+      }
+
+      if (!PhysReg) continue;
+
+      DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
+                   << "[%reg" << VirtReg <<"], removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] != -2) {
+          DEBUG(dbgs()  << "  Last use of "
+                        << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+                        <<"], removing it from live set\n");
+          removePhysReg(*SubRegs);
+        }
+      }
+    }
+
+    // Track registers defined by instruction.
+    UsedInInstr.reset();
+
+    // Loop over all of the operands of the instruction, spilling registers that
+    // are defined, and marking explicit destinations in the PhysRegsUsed map.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+        continue;
+
+      unsigned Reg = MO.getReg();
+      if (PhysRegsUsed[Reg] == -2) continue;  // Something like ESP.
+      // These are extra physical register defs when a sub-register
+      // is defined (def of a sub-register is a read/mod/write of the
+      // larger registers). Ignore.
+      if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+      MF->getRegInfo().setPhysRegUsed(Reg);
+      spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+      PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+
+      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+           *SubRegs; ++SubRegs) {
+        if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+        MF->getRegInfo().setPhysRegUsed(*SubRegs);
+        PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+      }
+    }
+
+    // Loop over the implicit defs, spilling them as well.
+    if (TID.ImplicitDefs) {
+      for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+           *ImplicitDefs; ++ImplicitDefs) {
+        unsigned Reg = *ImplicitDefs;
+        if (PhysRegsUsed[Reg] != -2) {
+          spillPhysReg(MBB, MI, Reg, true);
+          PhysRegsUsed[Reg] = 0;            // It is free and reserved now
+        }
+        MF->getRegInfo().setPhysRegUsed(Reg);
+        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+             *SubRegs; ++SubRegs) {
+          if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+          PhysRegsUsed[*SubRegs] = 0;  // It is free and reserved now
+          MF->getRegInfo().setPhysRegUsed(*SubRegs);
+        }
+      }
+    }
+
+    SmallVector<unsigned, 8> DeadDefs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDead())
+        DeadDefs.push_back(MO.getReg());
+    }
+
+    // Okay, we have allocated all of the source operands and spilled any values
+    // that would be destroyed by defs of this instruction.  Loop over the
+    // explicit defs and assign them to a register, spilling incoming values if
+    // we need to scavenge a register.
+    //
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
+          MO.isEarlyClobber() ||
+          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+
+      unsigned DestVirtReg = MO.getReg();
+      unsigned DestPhysReg;
+
+      // If DestVirtReg already has a value, use it.
+      if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
+        // If this is a copy try to reuse the input as the output;
+        // that will make the copy go away.
+        // If this is a copy, the source reg is a phys reg, and
+        // that reg is available, use that phys reg for DestPhysReg.
+        // If this is a copy, the source reg is a virtual reg, and
+        // the phys reg that was assigned to that virtual reg is now
+        // available, use that phys reg for DestPhysReg.  (If it's now
+        // available that means this was the last use of the source.)
+        if (isCopy &&
+            TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
+            isPhysRegAvailable(SrcCopyReg)) {
+          DestPhysReg = SrcCopyReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else if (isCopy &&
+            TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
+            SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
+            MF->getRegInfo().getRegClass(DestVirtReg)->
+                             contains(SrcCopyPhysReg)) {
+          DestPhysReg = SrcCopyPhysReg;
+          assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+        } else
+          DestPhysReg = getReg(MBB, MI, DestVirtReg);
+      }
+      MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+      markVirtRegModified(DestVirtReg);
+      getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+      DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
+                   << " to %reg" << DestVirtReg << "\n");
+      MO.setReg(DestPhysReg);  // Assign the output register
+      UsedInInstr.set(DestPhysReg);
+    }
+
+    // If this instruction defines any registers that are immediately dead,
+    // kill them now.
+    //
+    for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+      unsigned VirtReg = DeadDefs[i];
+      unsigned PhysReg = VirtReg;
+      if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+        PhysReg = PhysRegSlot;
+        assert(PhysReg != 0);
+        PhysRegSlot = 0;
+      } else if (PhysRegsUsed[PhysReg] == -2) {
+        // Unallocatable register dead, ignore.
+        continue;
+      } else if (!PhysReg)
+        continue;
+
+      DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
+                    << " [%reg" << VirtReg
+                    << "] is never used, removing it from live set\n");
+      removePhysReg(PhysReg);
+      for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+           *AliasSet; ++AliasSet) {
+        if (PhysRegsUsed[*AliasSet] != -2) {
+          DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
+                        << " [%reg" << *AliasSet
+                        << "] is never used, removing it from live set\n");
+          removePhysReg(*AliasSet);
+        }
+      }
+    }
+
+    // Finally, if this is a noop copy instruction, zap it.  (Except that if
+    // the copy is dead, it must be kept to avoid messing up liveness info for
+    // the register scavenger.  See pr4100.)
+    if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
+                         SrcCopySubReg, DstCopySubReg) &&
+        SrcCopyReg == DstCopyReg && DeadDefs.empty())
+      MBB.erase(MI);
+  }
+
+  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+  // Spill all physical registers holding virtual registers now.
+  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+    if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+      if (unsigned VirtReg = PhysRegsUsed[i])
+        spillVirtReg(MBB, MI, VirtReg, i);
+      else
+        removePhysReg(i);
+    }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+  DEBUG(dbgs() << "Machine Function\n");
+  MF = &Fn;
+  TM = &Fn.getTarget();
+  TRI = TM->getRegisterInfo();
+  TII = TM->getInstrInfo();
+
+  PhysRegsUsed.assign(TRI->getNumRegs(), -1);
+  UsedInInstr.resize(TRI->getNumRegs());
+
+  // At various places we want to efficiently check to see whether a register
+  // is allocatable.  To handle this, we mark all unallocatable registers as
+  // being pinned down, permanently.
+  {
+    BitVector Allocable = TRI->getAllocatableSet(Fn);
+    for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+      if (!Allocable[i])
+        PhysRegsUsed[i] = -2;  // Mark the reg unallocable.
+  }
+
+  // initialize the virtual->physical register map to have a 'null'
+  // mapping for all virtual registers
+  unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+  StackSlotForVirtReg.grow(LastVirtReg);
+  Virt2PhysRegMap.grow(LastVirtReg);
+  Virt2LastUseMap.grow(LastVirtReg);
+  VirtRegModified.resize(LastVirtReg+1 -
+                         TargetRegisterInfo::FirstVirtualRegister);
+  UsedInMultipleBlocks.resize(LastVirtReg+1 -
+                              TargetRegisterInfo::FirstVirtualRegister);
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB)
+    AllocateBasicBlock(*MBB);
+
+  StackSlotForVirtReg.clear();
+  PhysRegsUsed.clear();
+  VirtRegModified.clear();
+  UsedInMultipleBlocks.clear();
+  Virt2PhysRegMap.clear();
+  Virt2LastUseMap.clear();
+  return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+  return new RAFast();
+}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5c5a394..6c8fc0c 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -1177,7 +1177,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
         assignRegOrStackSlotAtInterval(cur);
       } else {
         assert(false && "Ran out of registers during register allocation!");
-        llvm_report_error("Ran out of registers during register allocation!");
+        report_fatal_error("Ran out of registers during register allocation!");
       }
       return;
     }
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 0ef041e..94456d1 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -189,6 +189,9 @@ namespace {
     ///
     void removePhysReg(unsigned PhysReg);
 
+    void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                      unsigned VirtReg, unsigned PhysReg, bool isKill);
+
     /// spillVirtReg - This method spills the value specified by PhysReg into
     /// the virtual register slot specified by VirtReg.  It then updates the RA
     /// data structures to indicate the fact that PhysReg is now available.
@@ -286,6 +289,17 @@ void RALocal::removePhysReg(unsigned PhysReg) {
     PhysRegsUseOrder.erase(It);
 }
 
+/// storeVirtReg - Store a virtual register to its assigned stack slot.
+void RALocal::storeVirtReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I,
+                           unsigned VirtReg, unsigned PhysReg,
+                           bool isKill) {
+  const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+  int FrameIndex = getStackSpaceFor(VirtReg, RC);
+  DEBUG(dbgs() << " to stack slot #" << FrameIndex);
+  TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+  ++NumStores;   // Update statistics
+}
 
 /// spillVirtReg - This method spills the value specified by PhysReg into the
 /// virtual register slot specified by VirtReg.  It then updates the RA data
@@ -309,15 +323,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
     // Otherwise, there is a virtual register corresponding to this physical
     // register.  We only need to spill it into its stack slot if it has been
     // modified.
-    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
-    int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
     // If the instruction reads the register that's spilled, (e.g. this can
     // happen if it is a move to a physical register), then the spill
     // instruction is not a kill.
     bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
-    TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
-    ++NumStores;   // Update statistics
+    storeVirtReg(MBB, I, VirtReg, PhysReg, isKill);
   }
 
   getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
@@ -549,7 +559,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
            << "constraints:\n";
       MI->print(Msg, TM);
     }
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
   for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
        *SubRegs; ++SubRegs) {
@@ -563,7 +573,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
            << "constraints:\n";
       MI->print(Msg, TM);
     }
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 
   return MI;
@@ -633,7 +643,10 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       // uses regs before it defs them.
       if (!MO.isReg() || !MO.getReg() || !MO.isUse())
         continue;
-      
+
+      // Ignore helpful kill flags from earlier passes.
+      MO.setIsKill(false);
+
       LastUseDef[MO.getReg()] = std::make_pair(I, i);
       
       if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
@@ -801,9 +814,12 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
         dbgs() << "\nStarting RegAlloc of: " << *MI;
         dbgs() << "  Regs have values: ";
         for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
-          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+          if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+            if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i]))
+              dbgs() << "*";
             dbgs() << "[" << TRI->getName(i)
                    << ",%reg" << PhysRegsUsed[i] << "] ";
+          }
         dbgs() << '\n';
       });
 
@@ -1092,6 +1108,20 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
     }
     
+    // If this instruction is a call, make sure there are no dirty registers. The
+    // call might throw an exception, and the landing pad expects to find all
+    // registers in stack slots.
+    if (TID.isCall())
+      for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+        if (PhysRegsUsed[i] <= 0) continue;
+        unsigned VirtReg = PhysRegsUsed[i];
+        if (!isVirtRegModified(VirtReg)) continue;
+        DEBUG(dbgs() << "  Storing dirty %reg" << VirtReg);
+        storeVirtReg(MBB, MI, VirtReg, i, false);
+        markVirtRegModified(VirtReg, false);
+        DEBUG(dbgs() << " because the call might throw\n");
+      }
+
     // Finally, if this is a noop copy instruction, zap it.  (Except that if
     // the copy is dead, it must be kept to avoid messing up liveness info for
     // the register scavenger.  See pr4100.)
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 67bf209..179984f 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -64,7 +64,7 @@ void RegScavenger::initRegState() {
     return;
 
   // Live-in registers are in use.
-  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I)
     setUsed(*I);
 
@@ -136,6 +136,9 @@ void RegScavenger::forward() {
     ScavengeRestore = NULL;
   }
 
+  if (MI->isDebugValue())
+    return;
+
   // Find out which registers are early clobbered, killed, defined, and marked
   // def-dead in this instruction.
   BitVector EarlyClobberRegs(NumPhysRegs);
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1f3e295..587f001 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -29,7 +29,6 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
     TRI(TM.getRegisterInfo()),
     TLI(TM.getTargetLowering()),
     MF(mf), MRI(mf.getRegInfo()),
-    ConstPool(MF.getConstantPool()),
     EntrySU(), ExitSU() {
 }
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ecc49e2..ca235c3 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -272,8 +272,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           // perform its own adjustments.
           const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
           if (!UnitLatencies) {
-            ComputeOperandLatency(SU, UseSU, (SDep &)dep);
-            ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
           }
           UseSU->addPred(dep);
         }
@@ -285,8 +285,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
               continue;
             const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
             if (!UnitLatencies) {
-              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
-              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
             }
             UseSU->addPred(dep);
           }
@@ -572,8 +572,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 }
 
 // EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::
-EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
   // For MachineInstr-based scheduling, we're rescheduling the instructions in
   // the block, so start by removing them from the block.
   while (Begin != InsertPos) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index c9b44de..d70608f 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -20,7 +20,6 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include <map>
 
@@ -171,8 +170,7 @@ namespace llvm {
     virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
                                        SDep& dep) const;
 
-    virtual MachineBasicBlock*
-    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*);
+    virtual MachineBasicBlock *EmitSchedule();
 
     /// StartBlock - Prepare to perform scheduling in the given block.
     ///
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 80c7d7c..0cfd5e1 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMSelectionDAG
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
+  TargetSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a336e0a..3639f80 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,6 +129,14 @@ namespace {
     bool CombineToPreIndexedLoadStore(SDNode *N);
     bool CombineToPostIndexedLoadStore(SDNode *N);
 
+    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue PromoteIntBinOp(SDValue Op);
+    SDValue PromoteIntShiftOp(SDValue Op);
+    SDValue PromoteExtend(SDValue Op);
+    bool PromoteLoad(SDValue Op);
 
     /// combine - call the node-specific routine that knows how to fold each
     /// particular type of node. If that doesn't do anything, try the
@@ -254,24 +262,28 @@ namespace {
     /// looking for a better chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
-    /// getShiftAmountTy - Returns a type large enough to hold any valid
-    /// shift amount - before type legalization these can be huge.
-    EVT getShiftAmountTy() {
-      return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
-    }
-
-public:
+  public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
-      : DAG(D),
-        TLI(D.getTargetLoweringInfo()),
-        Level(Unrestricted),
-        OptLevel(OL),
-        LegalOperations(false),
-        LegalTypes(false),
-        AA(A) {}
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+        OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
 
     /// Run - runs the dag combiner on all nodes in the work list
     void Run(CombineLevel AtLevel);
+    
+    SelectionDAG &getDAG() const { return DAG; }
+    
+    /// getShiftAmountTy - Returns a type large enough to hold any valid
+    /// shift amount - before type legalization these can be huge.
+    EVT getShiftAmountTy() {
+      return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+    }
+    
+    /// isTypeLegal - This method returns true if we are running before type
+    /// legalization or if the specified VT is legal.
+    bool isTypeLegal(const EVT &VT) {
+      if (!LegalTypes) return true;
+      return TLI.isTypeLegal(VT);
+    }
   };
 }
 
@@ -577,9 +589,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
   return SDValue(N, 0);
 }
 
-void
-DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
-                                                                          TLO) {
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
   // Replace all uses.  If any nodes become isomorphic to other nodes and
   // are deleted, make sure to remove them from our worklist.
   WorkListRemover DeadNodes(*this);
@@ -609,7 +620,7 @@ DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
 /// it can be simplified or if things it uses can be simplified by bit
 /// propagation.  If so, return true.
 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
-  TargetLowering::TargetLoweringOpt TLO(DAG);
+  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   APInt KnownZero, KnownOne;
   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
     return false;
@@ -629,6 +640,274 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
   return true;
 }
 
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+  DebugLoc dl = Load->getDebugLoc();
+  EVT VT = Load->getValueType(0);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+  DEBUG(dbgs() << "\nReplacing.9 ";
+        Load->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Trunc.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
+                                &DeadNodes);
+  removeFromWorkList(Load);
+  DAG.DeleteNode(Load);
+  AddToWorkList(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+  Replace = false;
+  DebugLoc dl = Op.getDebugLoc();
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    Replace = true;
+    return DAG.getExtLoad(ExtType, dl, PVT,
+                          LD->getChain(), LD->getBasePtr(),
+                          LD->getSrcValue(), LD->getSrcValueOffset(),
+                          MemVT, LD->isVolatile(),
+                          LD->isNonTemporal(), LD->getAlignment());
+  }
+
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  default: break;
+  case ISD::AssertSext:
+    return DAG.getNode(ISD::AssertSext, dl, PVT,
+                       SExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::AssertZext:
+    return DAG.getNode(ISD::AssertZext, dl, PVT,
+                       ZExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::Constant: {
+    unsigned ExtOpc =
+      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    return DAG.getNode(ExtOpc, dl, PVT, Op);
+  }    
+  }
+
+  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+    return SDValue();
+  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+    return SDValue();
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+                     DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace0 = false;
+    SDValue N0 = Op.getOperand(0);
+    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+    if (NN0.getNode() == 0)
+      return SDValue();
+
+    bool Replace1 = false;
+    SDValue N1 = Op.getOperand(1);
+    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+    if (NN1.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(NN0.getNode());
+    AddToWorkList(NN1.getNode());
+
+    if (Replace0)
+      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+    if (Replace1)
+      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
+  }
+  return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace = false;
+    SDValue N0 = Op.getOperand(0);
+    if (Opc == ISD::SRA)
+      N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+    else if (Opc == ISD::SRL)
+      N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+    else
+      N0 = PromoteOperand(N0, PVT, Replace);
+    if (N0.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(N0.getNode());
+    if (Replace)
+      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+    // fold (aext (aext x)) -> (aext x)
+    // fold (aext (zext x)) -> (zext x)
+    // fold (aext (sext x)) -> (sext x)
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+  }
+  return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+  if (!LegalOperations)
+    return false;
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return false;
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return false;
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    DebugLoc dl = Op.getDebugLoc();
+    SDNode *N = Op.getNode();
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+                                   LD->getChain(), LD->getBasePtr(),
+                                   LD->getSrcValue(), LD->getSrcValueOffset(),
+                                   MemVT, LD->isVolatile(),
+                                   LD->isNonTemporal(), LD->getAlignment());
+    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+    DEBUG(dbgs() << "\nPromoting ";
+          N->dump(&DAG);
+          dbgs() << "\nTo: ";
+          Result.getNode()->dump(&DAG);
+          dbgs() << '\n');
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+    removeFromWorkList(N);
+    DAG.DeleteNode(N);
+    AddToWorkList(Result.getNode());
+    return true;
+  }
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 //  Main DAG Combiner implementation
 //===----------------------------------------------------------------------===//
@@ -732,7 +1011,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 }
 
 SDValue DAGCombiner::visit(SDNode *N) {
-  switch(N->getOpcode()) {
+  switch (N->getOpcode()) {
   default: break;
   case ISD::TokenFactor:        return visitTokenFactor(N);
   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
@@ -817,6 +1096,35 @@ SDValue DAGCombiner::combine(SDNode *N) {
     }
   }
 
+  // If nothing happened still, try promoting the operation.
+  if (RV.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default: break;
+    case ISD::ADD:
+    case ISD::SUB:
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+      RV = PromoteIntBinOp(SDValue(N, 0));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+      RV = PromoteIntShiftOp(SDValue(N, 0));
+      break;
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      RV = PromoteExtend(SDValue(N, 0));
+      break;
+    case ISD::LOAD:
+      if (PromoteLoad(SDValue(N, 0)))
+        RV = SDValue(N, 0);
+      break;
+    }
+  }
+
   // If N is a commutative binary node, try commuting it to enable more
   // sdisel CSE.
   if (RV.getNode() == 0 &&
@@ -1720,8 +2028,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // into a vsetcc.
   EVT Op0VT = N0.getOperand(0).getValueType();
   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
-       N0.getOpcode() == ISD::ANY_EXTEND  ||
        N0.getOpcode() == ISD::SIGN_EXTEND ||
+       // Avoid infinite looping with PromoteIntBinOp.
+       (N0.getOpcode() == ISD::ANY_EXTEND &&
+        (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
        (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
       !VT.isVector() &&
       Op0VT == N1.getOperand(0).getValueType() &&
@@ -2579,7 +2889,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
                        HiBitsMask);
   }
 
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSHL.getNode())
+      return NewSHL;
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -2693,7 +3009,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
 
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRA.getNode())
+      return NewSRA;
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitSRL(SDNode *N) {
@@ -2731,6 +3053,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(c1 + c2, N1.getValueType()));
   }
+  
+  // fold (srl (shl x, c), c) -> (and x, cst2)
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      N0.getValueSizeInBits() <= 64) {
+    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  }
+  
 
   // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -2739,10 +3070,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
       return DAG.getUNDEF(VT);
 
-    SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
-                                     N0.getOperand(0), N1);
-    AddToWorkList(SmallShift.getNode());
-    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+      SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+                                       N0.getOperand(0), N1);
+      AddToWorkList(SmallShift.getNode());
+      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    }
   }
 
   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
@@ -3205,24 +3538,40 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
   if (N0.getOpcode() == ISD::SETCC) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
-    if (VT.isVector() &&
+    // Only do this before legalize for now.
+    if (VT.isVector() && !LegalOperations) {
+      EVT N0VT = N0.getOperand(0).getValueType();
         // We know that the # elements of the results is the same as the
         // # elements of the compare (and the # elements of the compare result
         // for that matter).  Check to see that they are the same size.  If so,
         // we know that the element size of the sext'd result matches the
         // element size of the compare operands.
-        VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
-      
-        // Only do this before legalize for now.
-        !LegalOperations) {
-      return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
-                           N0.getOperand(1),
-                           cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+	return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+			     N0.getOperand(1),
+			     cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      else {
+	EVT MatchingElementType =
+	  EVT::getIntegerVT(*DAG.getContext(),
+			    N0VT.getScalarType().getSizeInBits());
+	EVT MatchingVectorType =
+	  EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+			   N0VT.getVectorNumElements());
+	SDValue VsetCC =
+	  DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+			N0.getOperand(1),
+			cast<CondCodeSDNode>(N0.getOperand(2))->get());
+	return 	DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(),  VT);
+      }
     }
-    
+
     // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    unsigned ElementWidth = VT.getScalarType().getSizeInBits();
     SDValue NegOne =
-      DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
     SDValue SCC =
       SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
                        NegOne, DAG.getConstant(0, VT),
@@ -3624,7 +3973,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   // Do not generate loads of non-round integer types since these can
   // be expensive (and would be wrong if the type is not byte sized).
   if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
-      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -3694,7 +4043,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   // if x is small enough.
   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
+    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
       return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
   }
 
@@ -3779,7 +4129,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   if (N0.getOpcode() == ISD::TRUNCATE)
     return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
-  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND ||
       N0.getOpcode() == ISD::ANY_EXTEND) {
     if (N0.getOperand(0).getValueType().bitsLT(VT))
       // if the source is smaller than the dest, we still need an extend
@@ -3805,7 +4156,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
 
   // fold (truncate (load x)) -> (smaller load x)
   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
-  return ReduceLoadWidth(N);
+  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
+    return ReduceLoadWidth(N);
+  return SDValue();
 }
 
 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
@@ -3949,7 +4302,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       VT.isInteger() && !VT.isVector()) {
     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
-    if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+    if (isTypeLegal(IntXVT)) {
       SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
                               IntXVT, N0.getOperand(1));
       AddToWorkList(X.getNode());
@@ -4075,8 +4428,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
         if (Op.getOpcode() == ISD::UNDEF) continue;
         EltIsUndef = false;
 
-        NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
-                    zextOrTrunc(SrcBitSize).zext(DstBitSize));
+        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
       }
 
       if (EltIsUndef)
@@ -4464,7 +4817,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
 
   // fold (fp_round_inreg c1fp) -> c1fp
-  if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+  if (N0CFP && isTypeLegal(EVT)) {
     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
     return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
   }
@@ -4676,7 +5029,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     if (Op0.getOpcode() == Op1.getOpcode()) {
       // Avoid missing important xor optimizations.
       SDValue Tmp = visitXOR(TheXor);
-      if (Tmp.getNode()) {
+      if (Tmp.getNode() && Tmp.getNode() != TheXor) {
         DEBUG(dbgs() << "\nReplacing.8 ";
               TheXor->dump(&DAG);
               dbgs() << "\nWith: ";
@@ -5145,6 +5498,136 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   return SDValue();
 }
 
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out.  If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+  std::pair<unsigned, unsigned> Result(0, 0);
+  
+  // Check for the structure we're looking for.
+  if (V->getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(V->getOperand(1)) ||
+      !ISD::isNormalLoad(V->getOperand(0).getNode()))
+    return Result;
+  
+  // Check the chain and pointer.
+  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+  if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
+  
+  // The store should be chained directly to the load or be an operand of a
+  // tokenfactor.
+  if (LD == Chain.getNode())
+    ; // ok.
+  else if (Chain->getOpcode() != ISD::TokenFactor)
+    return Result; // Fail.
+  else {
+    bool isOk = false;
+    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+      if (Chain->getOperand(i).getNode() == LD) {
+        isOk = true;
+        break;
+      }
+    if (!isOk) return Result;
+  }
+  
+  // This only handles simple types.
+  if (V.getValueType() != MVT::i16 &&
+      V.getValueType() != MVT::i32 &&
+      V.getValueType() != MVT::i64)
+    return Result;
+
+  // Check the constant mask.  Invert it so that the bits being masked out are
+  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
+  // follow the sign bit for uniformity.
+  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+  unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+  if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
+  unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+  if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
+  if (NotMaskLZ == 64) return Result;  // All zero mask.
+  
+  // See if we have a continuous run of bits.  If so, we have 0*1+0*
+  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+    return Result;
+
+  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+  if (V.getValueType() != MVT::i64 && NotMaskLZ)
+    NotMaskLZ -= 64-V.getValueSizeInBits();
+  
+  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+  switch (MaskedBytes) {
+  case 1: 
+  case 2: 
+  case 4: break;
+  default: return Result; // All one mask, or 5-byte mask.
+  }
+  
+  // Verify that the first bit starts at a multiple of mask so that the access
+  // is aligned the same as the access width.
+  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+  
+  Result.first = MaskedBytes;
+  Result.second = NotMaskTZ/8;
+  return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo.  If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+                                SDValue IVal, StoreSDNode *St,
+                                DAGCombiner *DC) {
+  unsigned NumBytes = MaskInfo.first;
+  unsigned ByteShift = MaskInfo.second;
+  SelectionDAG &DAG = DC->getDAG();
+  
+  // Check to see if IVal is all zeros in the part being masked in by the 'or'
+  // that uses this.  If not, this is not a replacement.
+  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+                                  ByteShift*8, (ByteShift+NumBytes)*8);
+  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+  
+  // Check that it is legal on the target to do this.  It is legal if the new
+  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+  // legalization.
+  MVT VT = MVT::getIntegerVT(NumBytes*8);
+  if (!DC->isTypeLegal(VT))
+    return 0;
+  
+  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
+  // shifted by ByteShift and truncated down to NumBytes.
+  if (ByteShift)
+    IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
+
+  // Figure out the offset for the store and the alignment of the access.
+  unsigned StOffset;
+  unsigned NewAlign = St->getAlignment();
+
+  if (DAG.getTargetLoweringInfo().isLittleEndian())
+    StOffset = ByteShift;
+  else
+    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+  
+  SDValue Ptr = St->getBasePtr();
+  if (StOffset) {
+    Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    NewAlign = MinAlign(NewAlign, StOffset);
+  }
+  
+  // Truncate down to the new size.
+  IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+  
+  ++OpsNarrowed;
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 
+                      St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+                      false, false, NewAlign).getNode();
+}
+
 
 /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
 /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
@@ -5164,6 +5647,28 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     return SDValue();
 
   unsigned Opc = Value.getOpcode();
+  
+  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+  // is a byte mask indicating a consecutive number of bytes, check to see if
+  // Y is known to provide just those bytes.  If so, we try to replace the
+  // load + replace + store sequence with a single (narrower) store, which makes
+  // the load dead.
+  if (Opc == ISD::OR) {
+    std::pair<unsigned, unsigned> MaskedLoad;
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(1), ST,this))
+        return SDValue(NewST, 0);
+                                           
+    // Or is commutative, so try swapping X and Y.
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(0), ST,this))
+        return SDValue(NewST, 0);
+  }
+  
   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
       Value.getOperand(1).getOpcode() != ISD::Constant)
     return SDValue();
@@ -5211,8 +5716,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
-      if (NewAlign <
-          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
+      const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -5282,8 +5787,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       case MVT::ppcf128:
         break;
       case MVT::f32:
-        if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
-             !ST->isVolatile()) ||
+        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
                               bitcastToAPInt().getZExtValue(), MVT::i32);
@@ -5294,7 +5798,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         }
         break;
       case MVT::f64:
-        if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
              !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -5551,7 +6055,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
         InVec = InVec.getOperand(0);
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
-        Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
       }
     }
 
@@ -5659,7 +6163,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     }
 
     // Add count and size info.
-    if (!TLI.isTypeLegal(VT) && LegalTypes)
+    if (!isTypeLegal(VT))
       return SDValue();
 
     // Return the new VECTOR_SHUFFLE node.
@@ -6287,7 +6791,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
 /// FindBaseOffset - Return true if base is a frame index, which is known not
 // to alias with anything but itself.  Provides base object and offset as results.
 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
-                           GlobalValue *&GV, void *&CV) {
+                           const GlobalValue *&GV, void *&CV) {
   // Assume it is a primitive operation.
   Base = Ptr; Offset = 0; GV = 0; CV = 0;
 
@@ -6335,7 +6839,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   // Gather base node and offset information.
   SDValue Base1, Base2;
   int64_t Offset1, Offset2;
-  GlobalValue *GV1, *GV2;
+  const GlobalValue *GV1, *GV2;
   void *CV1, *CV2;
   bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
   bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4bf41f2..b4c3833 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,4 +1,4 @@
-///===-- FastISel.cpp - Implementation of the FastISel class --------------===//
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -52,10 +52,11 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "FunctionLoweringInfo.h"
 using namespace llvm;
 
-unsigned FastISel::getRegForValue(Value *V) {
+unsigned FastISel::getRegForValue(const Value *V) {
   EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
   // Don't handle non-simple values in FastISel.
   if (!RealVT.isSimple())
@@ -83,7 +84,16 @@ unsigned FastISel::getRegForValue(Value *V) {
   if (Reg != 0)
     return Reg;
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+  return materializeRegForValue(V, VT);
+}
+
+/// materializeRegForValue - Helper for getRegForVale. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+  unsigned Reg = 0;
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     if (CI->getValue().getActiveBits() <= 64)
       Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
   } else if (isa<AllocaInst>(V)) {
@@ -93,10 +103,12 @@ unsigned FastISel::getRegForValue(Value *V) {
     // local-CSE'd with actual integer zeros.
     Reg =
       getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
-  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+  } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    // Try to emit the constant directly.
     Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
 
     if (!Reg) {
+      // Try to emit the constant by using an integer constant with a cast.
       const APFloat &Flt = CF->getValueAPF();
       EVT IntVT = TLI.getPointerTy();
 
@@ -114,9 +126,9 @@ unsigned FastISel::getRegForValue(Value *V) {
           Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
       }
     }
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (!SelectOperator(CE, CE->getOpcode())) return 0;
-    Reg = LocalValueMap[CE];
+  } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+    if (!SelectOperator(Op, Op->getOpcode())) return 0;
+    Reg = LocalValueMap[Op];
   } else if (isa<UndefValue>(V)) {
     Reg = createResultReg(TLI.getRegClassFor(VT));
     BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
@@ -134,11 +146,11 @@ unsigned FastISel::getRegForValue(Value *V) {
   return Reg;
 }
 
-unsigned FastISel::lookUpRegForValue(Value *V) {
+unsigned FastISel::lookUpRegForValue(const Value *V) {
   // Look up the value to see if we already have a register for it. We
   // cache values defined by Instructions across blocks, and other values
   // only locally. This is because Instructions already have the SSA
-  // def-dominatess-use requirement enforced.
+  // def-dominates-use requirement enforced.
   if (ValueMap.count(V))
     return ValueMap[V];
   return LocalValueMap[V];
@@ -150,7 +162,7 @@ unsigned FastISel::lookUpRegForValue(Value *V) {
 /// NOTE: This is only necessary because we might select a block that uses
 /// a value before we select the block that defines the value.  It might be
 /// possible to fix this by selecting blocks in reverse postorder.
-unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
+unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
   if (!isa<Instruction>(I)) {
     LocalValueMap[I] = Reg;
     return Reg;
@@ -167,7 +179,7 @@ unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
   return AssignedReg;
 }
 
-unsigned FastISel::getRegForGEPIndex(Value *Idx) {
+unsigned FastISel::getRegForGEPIndex(const Value *Idx) {
   unsigned IdxN = getRegForValue(Idx);
   if (IdxN == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -186,7 +198,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
 /// which has an opcode which directly corresponds to the given ISD opcode.
 ///
-bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
   EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
@@ -252,7 +264,7 @@ bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
   return true;
 }
 
-bool FastISel::SelectGetElementPtr(User *I) {
+bool FastISel::SelectGetElementPtr(const User *I) {
   unsigned N = getRegForValue(I->getOperand(0));
   if (N == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -260,9 +272,9 @@ bool FastISel::SelectGetElementPtr(User *I) {
 
   const Type *Ty = I->getOperand(0)->getType();
   MVT VT = TLI.getPointerTy();
-  for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
-       OI != E; ++OI) {
-    Value *Idx = *OI;
+  for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+       E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
@@ -280,7 +292,7 @@ bool FastISel::SelectGetElementPtr(User *I) {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->getZExtValue() == 0) continue;
         uint64_t Offs = 
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
@@ -316,51 +328,56 @@ bool FastISel::SelectGetElementPtr(User *I) {
   return true;
 }
 
-bool FastISel::SelectCall(User *I) {
-  Function *F = cast<CallInst>(I)->getCalledFunction();
+bool FastISel::SelectCall(const User *I) {
+  const Function *F = cast<CallInst>(I)->getCalledFunction();
   if (!F) return false;
 
+  // Handle selected intrinsic function calls.
   unsigned IID = F->getIntrinsicID();
   switch (IID) {
   default: break;
   case Intrinsic::dbg_declare: {
-    DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) ||
         !MF.getMMI().hasDebugInfo())
       return true;
 
-    Value *Address = DI->getAddress();
+    const Value *Address = DI->getAddress();
     if (!Address)
       return true;
-    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    if (isa<UndefValue>(Address))
+      return true;
+    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
     // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI) break;
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      StaticAllocaMap.find(AI);
-    if (SI == StaticAllocaMap.end()) break; // VLAs.
-    int FI = SI->second;
-    if (!DI->getDebugLoc().isUnknown())
-      MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
-    
-    // Building the map above is target independent.  Generating DBG_VALUE
-    // inline is target dependent; do this now.
-    (void)TargetSelectInstruction(cast<Instruction>(I));
+    // Note that if we have a byval struct argument, fast ISel is turned off;
+    // those are handled in SelectionDAGBuilder.
+    if (AI) {
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        StaticAllocaMap.find(AI);
+      if (SI == StaticAllocaMap.end()) break; // VLAs.
+      int FI = SI->second;
+      if (!DI->getDebugLoc().isUnknown())
+        MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
+    } else
+      // Building the map above is target independent.  Generating DBG_VALUE
+      // inline is target dependent; do this now.
+      (void)TargetSelectInstruction(cast<Instruction>(I));
     return true;
   }
   case Intrinsic::dbg_value: {
-    // This requires target support, but right now X86 is the only Fast target.
-    DbgValueInst *DI = cast<DbgValueInst>(I);
+    // This form of DBG_VALUE is target-independent.
+    const DbgValueInst *DI = cast<DbgValueInst>(I);
     const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
-    Value *V = DI->getValue();
+    const Value *V = DI->getValue();
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
       BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
-    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
-    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
                                      addMetadata(DI->getVariable());
     } else if (unsigned Reg = lookUpRegForValue(V)) {
@@ -438,10 +455,12 @@ bool FastISel::SelectCall(User *I) {
     break;
   }
   }
+
+  // An arbitrary call. Bail.
   return false;
 }
 
-bool FastISel::SelectCast(User *I, unsigned Opcode) {
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
     
@@ -493,7 +512,7 @@ bool FastISel::SelectCast(User *I, unsigned Opcode) {
   return true;
 }
 
-bool FastISel::SelectBitCast(User *I) {
+bool FastISel::SelectBitCast(const User *I) {
   // If the bitcast doesn't change the type, just use the operand value.
   if (I->getType() == I->getOperand(0)->getType()) {
     unsigned Reg = getRegForValue(I->getOperand(0));
@@ -544,15 +563,28 @@ bool FastISel::SelectBitCast(User *I) {
 }
 
 bool
-FastISel::SelectInstruction(Instruction *I) {
+FastISel::SelectInstruction(const Instruction *I) {
+  // Just before the terminator instruction, insert instructions to
+  // feed PHI nodes in successor blocks.
+  if (isa<TerminatorInst>(I))
+    if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+      return false;
+
+  DL = I->getDebugLoc();
+
   // First, try doing target-independent selection.
-  if (SelectOperator(I, I->getOpcode()))
+  if (SelectOperator(I, I->getOpcode())) {
+    DL = DebugLoc();
     return true;
+  }
 
   // Next, try calling the target to attempt to handle the instruction.
-  if (TargetSelectInstruction(I))
+  if (TargetSelectInstruction(I)) {
+    DL = DebugLoc();
     return true;
+  }
 
+  DL = DebugLoc();
   return false;
 }
 
@@ -573,7 +605,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
 /// SelectFNeg - Emit an FNeg operation.
 ///
 bool
-FastISel::SelectFNeg(User *I) {
+FastISel::SelectFNeg(const User *I) {
   unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
   if (OpReg == 0) return false;
 
@@ -614,7 +646,7 @@ FastISel::SelectFNeg(User *I) {
 }
 
 bool
-FastISel::SelectOperator(User *I, unsigned Opcode) {
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
   switch (Opcode) {
   case Instruction::Add:
     return SelectBinaryOp(I, ISD::ADD);
@@ -660,10 +692,10 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
     return SelectGetElementPtr(I);
 
   case Instruction::Br: {
-    BranchInst *BI = cast<BranchInst>(I);
+    const BranchInst *BI = cast<BranchInst>(I);
 
     if (BI->isUnconditional()) {
-      BasicBlock *LLVMSucc = BI->getSuccessor(0);
+      const BasicBlock *LLVMSucc = BI->getSuccessor(0);
       MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
       FastEmitBranch(MSucc);
       return true;
@@ -678,10 +710,6 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
     // Nothing to emit.
     return true;
 
-  case Instruction::PHI:
-    // PHI nodes are already emitted.
-    return true;
-
   case Instruction::Alloca:
     // FunctionLowering has the static-sized case covered.
     if (StaticAllocaMap.count(cast<AllocaInst>(I)))
@@ -721,6 +749,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
     return true;
   }
 
+  case Instruction::PHI:
+    llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
   default:
     // Unhandled instruction. Halt "fast" selection and bail.
     return false;
@@ -730,15 +761,17 @@ FastISel::SelectOperator(User *I, unsigned Opcode) {
 FastISel::FastISel(MachineFunction &mf,
                    DenseMap<const Value *, unsigned> &vm,
                    DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-                   DenseMap<const AllocaInst *, int> &am
+                   DenseMap<const AllocaInst *, int> &am,
+                   std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                   , SmallSet<Instruction*, 8> &cil
+                   , SmallSet<const Instruction *, 8> &cil
 #endif
                    )
   : MBB(0),
     ValueMap(vm),
     MBBMap(bm),
     StaticAllocaMap(am),
+    PHINodesToUpdate(pn),
 #ifndef NDEBUG
     CatchInfoLost(cil),
 #endif
@@ -775,7 +808,7 @@ unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
 }
 
 unsigned FastISel::FastEmit_f(MVT, MVT,
-                              unsigned, ConstantFP * /*FPImm*/) {
+                              unsigned, const ConstantFP * /*FPImm*/) {
   return 0;
 }
 
@@ -787,7 +820,7 @@ unsigned FastISel::FastEmit_ri(MVT, MVT,
 
 unsigned FastISel::FastEmit_rf(MVT, MVT,
                                unsigned, unsigned /*Op0*/,
-                               ConstantFP * /*FPImm*/) {
+                               const ConstantFP * /*FPImm*/) {
   return 0;
 }
 
@@ -820,7 +853,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
 unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
-                                unsigned Op0, ConstantFP *FPImm,
+                                unsigned Op0, const ConstantFP *FPImm,
                                 MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
   unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
@@ -930,7 +963,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
 
 unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
-                                   unsigned Op0, ConstantFP *FPImm) {
+                                   unsigned Op0, const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
 
@@ -1006,3 +1039,67 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
 unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
   return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
 }
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input.  We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB.  As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+  unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size();
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Only handle legal types. Two interesting things to note here. First,
+      // by bailing out early, we may leave behind some dead instructions,
+      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // use CreateRegForValue to create registers, so it always creates
+      // exactly one register for each non-void instruction.
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+        // Promote MVT::i1.
+        if (VT == MVT::i1)
+          VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+        else {
+          PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          return false;
+        }
+      }
+
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      unsigned Reg = getRegForValue(PHIOp);
+      if (Reg == 0) {
+        PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        return false;
+      }
+      PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+    }
+  }
+
+  return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 4fb2aa2..65c36c1 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -14,19 +14,18 @@
 
 #define DEBUG_TYPE "function-lowering-info"
 #include "FunctionLoweringInfo.h"
-#include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -34,99 +33,21 @@
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                                  const unsigned *Indices,
-                                  const unsigned *IndicesEnd,
-                                  unsigned CurIndex) {
-  // Base case: We're done.
-  if (Indices && Indices == IndicesEnd)
-    return CurIndex;
-
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-        EI != EE; ++EI) {
-      if (Indices && *Indices == unsigned(EI - EB))
-        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // Given an array type, recursively traverse the elements.
-  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
-      if (Indices && *Indices == i)
-        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // We haven't found the type we're looking for, so keep searching.
-  return CurIndex + 1;
-}
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                           SmallVectorImpl<EVT> &ValueVTs,
-                           SmallVectorImpl<uint64_t> *Offsets,
-                           uint64_t StartingOffset) {
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-         EI != EE; ++EI)
-      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
-                      StartingOffset + SL->getElementOffset(EI - EB));
-    return;
-  }
-  // Given an array type, recursively traverse the elements.
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
-                      StartingOffset + i * EltSize);
-    return;
-  }
-  // Interpret void as zero return values.
-  if (Ty->isVoidTy())
-    return;
-  // Base case: we can get an EVT for this LLVM IR type.
-  ValueVTs.push_back(TLI.getValueType(Ty));
-  if (Offsets)
-    Offsets->push_back(StartingOffset);
-}
-
 /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
 /// PHI nodes or outside of the basic block that defines it, or used by a
 /// switch or atomic instruction, which may expand to multiple basic blocks.
-static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+  if (I->use_empty()) return false;
   if (isa<PHINode>(I)) return true;
-  BasicBlock *BB = I->getParent();
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+  const BasicBlock *BB = I->getParent();
+  for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+        UI != E; ++UI)
     if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
       return true;
   return false;
@@ -135,26 +56,25 @@ static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
 /// entry block, return true.  This includes arguments used by switches, since
 /// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
   // With FastISel active, we may be splitting blocks, so force creation
   // of virtual registers for all non-dead arguments.
-  // Don't force virtual registers for byval arguments though, because
-  // fast-isel can't handle those in all cases.
-  if (EnableFastISel && !A->hasByValAttr())
+  if (EnableFastISel)
     return A->use_empty();
 
-  BasicBlock *Entry = A->getParent()->begin();
-  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI)
     if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
       return false;  // Use not in entry block.
   return true;
 }
 
-FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
   : TLI(tli) {
 }
 
-void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                bool EnableFastISel) {
   Fn = &fn;
   MF = &mf;
@@ -162,7 +82,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
 
   // Create a vreg for each argument register that is not dead and is used
   // outside of the entry block for the function.
-  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+  for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
        AI != E; ++AI)
     if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
       InitializeRegForValue(AI);
@@ -170,10 +90,10 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
   // Initialize the mapping of values to registers.  This is only set up for
   // instruction values that are used outside of the block that defines
   // them.
-  Function::iterator BB = Fn->begin(), EB = Fn->end();
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
         const Type *Ty = AI->getAllocatedType();
         uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
         unsigned Align =
@@ -187,8 +107,8 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
       }
 
   for (; BB != EB; ++BB)
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isUsedOutsideOfDefiningBlock(I))
         if (!isa<AllocaInst>(I) ||
             !StaticAllocaMap.count(cast<AllocaInst>(I)))
           InitializeRegForValue(I);
@@ -196,7 +116,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
   // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
   // also creates the initial PHI MachineInstrs, though none of the input
   // operands are populated.
-  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+  for (BB = Fn->begin(); BB != EB; ++BB) {
     MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
     MBBMap[BB] = MBB;
     MF->push_back(MBB);
@@ -209,14 +129,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
 
     // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
     // appropriate.
-    PHINode *PN;
-    DebugLoc DL;
-    for (BasicBlock::iterator
-           I = BB->begin(), E = BB->end(); I != E; ++I) {
-
-      PN = dyn_cast<PHINode>(I);
-      if (!PN || PN->use_empty()) continue;
+    for (BasicBlock::const_iterator I = BB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      if (PN->use_empty()) continue;
 
+      DebugLoc DL = PN->getDebugLoc();
       unsigned PHIReg = ValueMap[PN];
       assert(PHIReg && "PHI node does not have an assigned virtual register!");
 
@@ -232,12 +149,20 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
       }
     }
   }
+
+  // Mark landing pad blocks.
+  for (BB = Fn->begin(); BB != EB; ++BB)
+    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+      MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
 }
 
 /// clear - Clear out all the function-specific state. This returns this
 /// FunctionLoweringInfo to an empty state, ready to be used for a
 /// different function.
 void FunctionLoweringInfo::clear() {
+  assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+
   MBBMap.clear();
   ValueMap.clear();
   StaticAllocaMap.clear();
@@ -246,6 +171,7 @@ void FunctionLoweringInfo::clear() {
   CatchInfoFound.clear();
 #endif
   LiveOutRegInfo.clear();
+  ArgDbgValues.clear();
 }
 
 unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
@@ -277,30 +203,12 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
   return FirstReg;
 }
 
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
-  V = V->stripPointerCasts();
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
-
-  if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
-    assert(GV->hasInitializer() &&
-           "The EH catch-all value must have an initializer");
-    Value *Init = GV->getInitializer();
-    GV = dyn_cast<GlobalVariable>(Init);
-    if (!GV) V = cast<ConstantPointerNull>(Init);
-  }
-
-  assert((GV || isa<ConstantPointerNull>(V)) &&
-         "TypeInfo must be a global variable or NULL");
-  return GV;
-}
-
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
-void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
                         MachineBasicBlock *MBB) {
   // Inform the MachineModuleInfo of the personality for this landing pad.
-  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
   assert(CE->getOpcode() == Instruction::BitCast &&
          isa<Function>(CE->getOperand(0)) &&
          "Personality should be a function");
@@ -308,11 +216,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
 
   // Gather all the type infos for this landing pad and pass them along to
   // MachineModuleInfo.
-  std::vector<GlobalVariable *> TyInfo;
+  std::vector<const GlobalVariable *> TyInfo;
   unsigned N = I.getNumOperands();
 
   for (unsigned i = N - 1; i > 2; --i) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
       unsigned FilterLength = CI->getZExtValue();
       unsigned FirstCatch = i + FilterLength + !FilterLength;
       assert (FirstCatch <= N && "Invalid filter length");
@@ -349,10 +257,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
   }
 }
 
-void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
-    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+  for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end();
+       I != E; ++I)
+    if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
       // Apply the catch info to DestBB.
       AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
 #ifndef NDEBUG
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
index d851e64..4067a5b 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
@@ -15,12 +15,17 @@
 #ifndef FUNCTIONLOWERINGINFO_H
 #define FUNCTIONLOWERINGINFO_H
 
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #ifndef NDEBUG
 #include "llvm/ADT/SmallSet.h"
 #endif
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/Support/CallSite.h"
 #include <vector>
 
 namespace llvm {
@@ -31,6 +36,7 @@ class CallInst;
 class Function;
 class GlobalVariable;
 class Instruction;
+class MachineInstr;
 class MachineBasicBlock;
 class MachineFunction;
 class MachineModuleInfo;
@@ -44,8 +50,8 @@ class Value;
 ///
 class FunctionLoweringInfo {
 public:
-  TargetLowering &TLI;
-  Function *Fn;
+  const TargetLowering &TLI;
+  const Function *Fn;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
 
@@ -57,13 +63,6 @@ public:
   /// allocated to hold a pointer to the hidden sret parameter.
   unsigned DemoteRegister;
 
-  explicit FunctionLoweringInfo(TargetLowering &TLI);
-
-  /// set - Initialize this FunctionLoweringInfo with the given Function
-  /// and its associated MachineFunction.
-  ///
-  void set(Function &Fn, MachineFunction &MF, bool EnableFastISel);
-
   /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
   DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
 
@@ -77,27 +76,15 @@ public:
   /// anywhere in the function.
   DenseMap<const AllocaInst*, int> StaticAllocaMap;
 
+  /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for
+  /// function arguments that are inserted after scheduling is completed.
+  SmallVector<MachineInstr*, 8> ArgDbgValues;
+
 #ifndef NDEBUG
-  SmallSet<Instruction*, 8> CatchInfoLost;
-  SmallSet<Instruction*, 8> CatchInfoFound;
+  SmallSet<const Instruction *, 8> CatchInfoLost;
+  SmallSet<const Instruction *, 8> CatchInfoFound;
 #endif
 
-  unsigned MakeReg(EVT VT);
-  
-  /// isExportedInst - Return true if the specified value is an instruction
-  /// exported from its block.
-  bool isExportedInst(const Value *V) {
-    return ValueMap.count(V);
-  }
-
-  unsigned CreateRegForValue(const Value *V);
-  
-  unsigned InitializeRegForValue(const Value *V) {
-    unsigned &R = ValueMap[V];
-    assert(R == 0 && "Already initialized this value register!");
-    return R = CreateRegForValue(V);
-  }
-  
   struct LiveOutInfo {
     unsigned NumSignBits;
     APInt KnownOne, KnownZero;
@@ -108,42 +95,48 @@ public:
   /// register number offset by 'FirstVirtualRegister'.
   std::vector<LiveOutInfo> LiveOutRegInfo;
 
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
+  /// TODO: This isn't per-function state, it's per-basic-block state. But
+  /// there's no other convenient place for it to live right now.
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+  explicit FunctionLoweringInfo(const TargetLowering &TLI);
+
+  /// set - Initialize this FunctionLoweringInfo with the given Function
+  /// and its associated MachineFunction.
+  ///
+  void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel);
+
   /// clear - Clear out all the function-specific state. This returns this
   /// FunctionLoweringInfo to an empty state, ready to be used for a
   /// different function.
   void clear();
-};
 
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                            const unsigned *Indices,
-                            const unsigned *IndicesEnd,
-                            unsigned CurIndex = 0);
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                     SmallVectorImpl<EVT> &ValueVTs,
-                     SmallVectorImpl<uint64_t> *Offsets = 0,
-                     uint64_t StartingOffset = 0);
+  unsigned MakeReg(EVT VT);
+  
+  /// isExportedInst - Return true if the specified value is an instruction
+  /// exported from its block.
+  bool isExportedInst(const Value *V) {
+    return ValueMap.count(V);
+  }
 
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *ExtractTypeInfo(Value *V);
+  unsigned CreateRegForValue(const Value *V);
+  
+  unsigned InitializeRegForValue(const Value *V) {
+    unsigned &R = ValueMap[V];
+    assert(R == 0 && "Already initialized this value register!");
+    return R = CreateRegForValue(V);
+  }
+};
 
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
-void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+void AddCatchInfo(const CallInst &I,
+                  MachineModuleInfo *MMI, MachineBasicBlock *MBB);
 
 /// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
-void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
                    MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 28ba343..c5dae82 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -296,8 +296,19 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     }
   }
 
+  // If this value has only one use, that use is a kill. This is a
+  // conservative approximation. Tied operands are never killed, so we need
+  // to check that. And that means we need to determine the index of the
+  // operand.
+  unsigned Idx = MI->getNumOperands();
+  while (Idx > 0 &&
+         MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+    --Idx;
+  bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+  bool isKill = Op.hasOneUse() && !isTied && !IsDebug;
+
   MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
-                                           false/*isImp*/, false/*isKill*/,
+                                           false/*isImp*/, isKill,
                                            false/*isDead*/, false/*isUndef*/,
                                            false/*isEarlyClobber*/,
                                            0/*SubReg*/, IsDebug));
@@ -440,8 +451,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
     const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
     const TargetRegisterClass *SRC =
-      getSuperRegisterRegClass(TRC, SubIdx,
-                               Node->getValueType(0));
+      getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
 
     // Figure out the register class to create for the destreg.
     // Note that if we're going to directly use an existing register,
@@ -504,41 +514,83 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
   assert(isNew && "Node emitted out of order - early");
 }
 
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+                                  DenseMap<SDValue, unsigned> &VRBaseMap) {
+  const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+  unsigned NewVReg = MRI->createVirtualRegister(RC);
+  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                             TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
+  unsigned NumOps = Node->getNumOperands();
+  assert((NumOps & 1) == 0 &&
+         "REG_SEQUENCE must have an even number of operands!");
+  const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Op = Node->getOperand(i);
+#ifndef NDEBUG
+    if (i & 1) {
+      unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+      unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+    const TargetRegisterClass *SRC =
+      getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
+    assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE");
+    }
+#endif
+    AddOperand(MI, Op, i+1, &II, VRBaseMap);
+  }
+
+  MBB->insert(InsertPos, MI);
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
 /// EmitDbgValue - Generate machine instruction for a dbg_value node.
 ///
-MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD,
-                                         MachineBasicBlock *InsertBB,
-                                         DenseMap<SDValue, unsigned> &VRBaseMap,
-                         DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+                           DenseMap<SDValue, unsigned> &VRBaseMap) {
   uint64_t Offset = SD->getOffset();
   MDNode* MDPtr = SD->getMDPtr();
   DebugLoc DL = SD->getDebugLoc();
 
+  if (SD->getKind() == SDDbgValue::FRAMEIX) {
+    // Stack address; this needs to be lowered in target-dependent fashion.
+    // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+    unsigned FrameIx = SD->getFrameIx();
+    return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+  }
+  // Otherwise, we're going to create an instruction here.
   const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
   MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
   if (SD->getKind() == SDDbgValue::SDNODE) {
-    AddOperand(&*MIB, SDValue(SD->getSDNode(), SD->getResNo()),
-               (*MIB).getNumOperands(), &II, VRBaseMap, true /*IsDebug*/);
+    SDNode *Node = SD->getSDNode();
+    SDValue Op = SDValue(Node, SD->getResNo());
+    // It's possible we replaced this SDNode with other(s) and therefore
+    // didn't generate code for it.  It's better to catch these cases where
+    // they happen and transfer the debug info, but trying to guarantee that
+    // in all cases would be very fragile; this is a safeguard for any
+    // that were missed.
+    DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+    if (I==VRBaseMap.end())
+      MIB.addReg(0U);       // undef
+    else
+      AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+                 true /*IsDebug*/);
   } else if (SD->getKind() == SDDbgValue::CONST) {
-    Value *V = SD->getConst();
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    const Value *V = SD->getConst();
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       MIB.addImm(CI->getSExtValue());
-    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       MIB.addFPImm(CF);
     } else {
       // Could be an Undef.  In any case insert an Undef so we can see what we
       // dropped.
       MIB.addReg(0U);
     }
-  } else if (SD->getKind() == SDDbgValue::FRAMEIX) {
-    unsigned FrameIx = SD->getFrameIx();
-    // Stack address; this needs to be lowered in target-dependent fashion.
-    // FIXME test that the target supports this somehow; if not emit Undef.
-    // Create a pseudo for EmitInstrWithCustomInserter's consumption.
-    MIB.addImm(FrameIx).addImm(Offset).addMetadata(MDPtr);
-    abort();
-    TLI->EmitInstrWithCustomInserter(&*MIB, InsertBB, EM);
-    return 0;
   } else {
     // Insert an Undef so we can see what we dropped.
     MIB.addReg(0U);
@@ -553,8 +605,7 @@ MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD,
 ///
 void InstrEmitter::
 EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
-                DenseMap<SDValue, unsigned> &VRBaseMap,
-                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
   unsigned Opc = Node->getMachineOpcode();
   
   // Handle subreg insert/extract specially
@@ -571,6 +622,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
     return;
   }
 
+  // Handle REG_SEQUENCE specially.
+  if (Opc == TargetOpcode::REG_SEQUENCE) {
+    EmitRegSequence(Node, VRBaseMap);
+    return;
+  }
+
   if (Opc == TargetOpcode::IMPLICIT_DEF)
     // We want a unique VR for each IMPLICIT_DEF use.
     return;
@@ -615,7 +672,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   if (II.usesCustomInsertionHook()) {
     // Insert this instruction into the basic block using a target
     // specific inserter which may returns a new basic block.
-    MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+    MBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
     InsertPos = MBB->end();
     return;
   }
@@ -646,7 +703,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
            i != e; ++i)
         MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
     }
-  return;
 }
 
 /// EmitSpecialNode - Generate machine code for a target-independent node and
@@ -720,12 +776,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
                                TII->get(TargetOpcode::INLINEASM));
 
     // Add the asm string as an external symbol operand.
-    const char *AsmStr =
-      cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+    SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+    const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
     MI->addOperand(MachineOperand::CreateES(AsmStr));
       
     // Add all of the operand registers to the instruction.
-    for (unsigned i = 2; i != NumOps;) {
+    for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
       unsigned Flags =
         cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
       unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
@@ -733,24 +789,24 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       MI->addOperand(MachineOperand::CreateImm(Flags));
       ++i;  // Skip the ID value.
         
-      switch (Flags & 7) {
+      switch (InlineAsm::getKind(Flags)) {
       default: llvm_unreachable("Bad flags!");
-      case 2:   // Def of register.
+        case InlineAsm::Kind_RegDef:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, true));
         }
         break;
-      case 6:   // Def of earlyclobber register.
+      case InlineAsm::Kind_RegDefEarlyClobber:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, 
                                                    false, false, true));
         }
         break;
-      case 1:  // Use of register.
-      case 3:  // Immediate.
-      case 4:  // Addressing mode.
+      case InlineAsm::Kind_RegUse:  // Use of register.
+      case InlineAsm::Kind_Imm:  // Immediate.
+      case InlineAsm::Kind_Mem:  // Addressing mode.
         // The addressing mode has been selected, just add all of the
         // operands to the machine instruction.
         for (; NumVals; --NumVals, ++i)
@@ -758,6 +814,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         break;
       }
     }
+    
+    // Get the mdnode from the asm if it exists and add it to the instruction.
+    SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+    const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+    if (MD)
+      MI->addOperand(MachineOperand::CreateMetadata(MD));
+    
     MBB->insert(InsertPos, MI);
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index baabb75..c7e7c71 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -88,6 +88,9 @@ class InstrEmitter {
   void EmitCopyToRegClassNode(SDNode *Node,
                               DenseMap<SDValue, unsigned> &VRBaseMap);
 
+  /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+  ///
+  void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
 public:
   /// CountResults - The results of target nodes have register or immediate
   /// operands first, then an optional chain, and optional flag operands
@@ -103,17 +106,14 @@ public:
   /// EmitDbgValue - Generate machine instruction for a dbg_value node.
   ///
   MachineInstr *EmitDbgValue(SDDbgValue *SD,
-                          MachineBasicBlock *InsertBB,
-                          DenseMap<SDValue, unsigned> &VRBaseMap,
-                          DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+                             DenseMap<SDValue, unsigned> &VRBaseMap);
 
   /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
   void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
-                DenseMap<SDValue, unsigned> &VRBaseMap,
-                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
     if (Node->isMachineOpcode())
-      EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap, EM);
+      EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
     else
       EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
   }
@@ -130,8 +130,7 @@ public:
   
 private:
   void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
-                       DenseMap<SDValue, unsigned> &VRBaseMap,
-                       DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
   void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
                        DenseMap<SDValue, unsigned> &VRBaseMap);
 };
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d35f0da..bedfa57 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -32,13 +32,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include <map>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -55,7 +53,8 @@ using namespace llvm;
 ///
 namespace {
 class SelectionDAGLegalize {
-  TargetLowering &TLI;
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
   CodeGenOpt::Level OptLevel;
 
@@ -213,7 +212,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
 
 SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
                                            CodeGenOpt::Level ol)
-  : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
+  : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+    DAG(dag), OptLevel(ol),
     ValueTypeActions(TLI.getValueTypeActions()) {
   assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
@@ -409,7 +409,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // to the final destination using (unaligned) integer loads and stores.
       EVT StoredVT = ST->getMemoryVT();
       EVT RegVT =
-        TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits()));
+        TLI.getRegisterType(*DAG.getContext(),
+                            EVT::getIntegerVT(*DAG.getContext(),
+                                              StoredVT.getSizeInBits()));
       unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
       unsigned RegBytes = RegVT.getSizeInBits() / 8;
       unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
@@ -445,7 +447,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // The last store may be partial.  Do a truncating store.  On big-endian
       // machines this requires an extending load from the stack slot to ensure
       // that the bits are in the right place.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    8 * (StoredBytes - Offset));
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
@@ -548,7 +551,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       }
 
       // The last copy may be partial.  Do an extending load.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset));
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    8 * (LoadedBytes - Offset));
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
@@ -968,11 +972,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     Node->dump( &DAG);
     dbgs() << "\n";
 #endif
-    llvm_unreachable("Do not know how to legalize this operator!");
+    assert(0 && "Do not know how to legalize this operator!");
 
   case ISD::BUILD_VECTOR:
     switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
-    default: llvm_unreachable("This action is not supported yet!");
+    default: assert(0 && "This action is not supported yet!");
     case TargetLowering::Custom:
       Tmp3 = TLI.LowerOperation(Result, DAG);
       if (Tmp3.getNode()) {
@@ -1089,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       Tmp4 = Result.getValue(1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: llvm_unreachable("This action is not supported yet!");
+      default: assert(0 && "This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
@@ -1259,7 +1263,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Tmp2 = LegalizeOp(Ch);
       } else {
         switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Custom:
           isCustom = true;
           // FALLTHROUGH
@@ -1357,7 +1361,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1394,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Promote to a byte-sized store with upper bits zero if not
         // storing an integral number of bytes.  For example, promote
         // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
-        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                    SVOffset, NVT, isVolatile, isNonTemporal,
@@ -1459,7 +1464,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                           ST->getOffset());
 
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: llvm_unreachable("This action is not supported yet!");
+        default: assert(0 && "This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1658,8 +1663,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   Chain = SP.getValue(1);
   unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-  unsigned StackAlign =
-    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
   if (Align > StackAlign)
     SP = DAG.getNode(ISD::AND, dl, VT, SP,
                       DAG.getConstant(-(uint64_t)Align, VT));
@@ -1683,7 +1687,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
   EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: llvm_unreachable("Unknown condition code action!");
+  default: assert(0 && "Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1691,7 +1695,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: llvm_unreachable("Don't know how to expand this condition!");
+    default: assert(0 && "Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -1738,8 +1742,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
-  unsigned DestAlign =
-    TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext()));
+  const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+  unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -1930,7 +1934,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+  default: assert(0 && "Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -1947,7 +1951,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+  default: assert(0 && "Unexpected request for libcall!");
   case MVT::i8:   LC = Call_I8; break;
   case MVT::i16:  LC = Call_I16; break;
   case MVT::i32:  LC = Call_I32; break;
@@ -2062,7 +2066,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // offset depending on the data type.
   uint64_t FF;
   switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unsupported integer type!");
+  default: assert(0 && "Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
@@ -2182,7 +2186,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   EVT SHVT = TLI.getShiftAmountTy();
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+  default: assert(0 && "Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2227,7 +2231,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: llvm_unreachable("Cannot expand this yet!");
+  default: assert(0 && "Cannot expand this yet!");
   case ISD::CTPOP: {
     static const uint64_t mask[6] = {
       0x5555555555555555ULL, 0x3333333333333333ULL,
@@ -2333,10 +2337,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     EVT VT = Node->getValueType(0);
     if (VT.isInteger())
       Results.push_back(DAG.getConstant(0, VT));
-    else if (VT.isFloatingPoint())
+    else {
+      assert(VT.isFloatingPoint() && "Unknown value type!");
       Results.push_back(DAG.getConstantFP(0, VT));
-    else
-      llvm_unreachable("Unknown value type!");
+    }
     break;
   }
   case ISD::TRAP: {
@@ -2431,7 +2435,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
-                                       getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                          getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
@@ -2842,7 +2846,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) {
+    } else {
+      // FIXME: We should be able to fall back to a libcall with an illegal
+      // type in some cases.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                               VT.getSizeInBits() * 2)) &&
+             "Don't know how to expand this operation yet!");
       EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -2851,12 +2862,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
                             DAG.getIntPtrConstant(1));
-    } else {
-      // FIXME: We should be able to fall back to a libcall with an illegal
-      // type in some cases.
-      // Also, we can fall back to a division in some cases, but that's a big
-      // performance hit in the general case.
-      llvm_unreachable("Don't know how to expand this operation yet!");
     }
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -2916,7 +2921,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                 PseudoSourceValue::getJumpTable(), 0, MemVT,
                                 false, false, 0);
     Addr = LD;
-    if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (TM.getRelocationModel() == Reloc::PIC_) {
       // For PIC, the sequence is:
       // BRIND(load(Jumptable + index) + RelocBase)
       // RelocBase can be JumpTable, GOT or some sort of global base.
@@ -3078,11 +3083,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     if (OVT.isVector()) {
       ExtOp   = ISD::BIT_CONVERT;
       TruncOp = ISD::BIT_CONVERT;
-    } else if (OVT.isInteger()) {
+    } else {
+      assert(OVT.isInteger() && "Cannot promote logic operation");
       ExtOp   = ISD::ANY_EXTEND;
       TruncOp = ISD::TRUNCATE;
-    } else {
-      llvm_report_error("Cannot promote logic operation");
     }
     // Promote each of the values to the new type.
     Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 665b21f..e3eb949 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -109,14 +109,16 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
   // Convert the inputs to integers, and build a new pair out of them.
   return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0)),
                      BitConvertToInteger(N->getOperand(0)),
                      BitConvertToInteger(N->getOperand(1)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
   return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
-                         TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+                         TLI.getTypeToTransformTo(*DAG.getContext(),
+                                                  N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -338,7 +340,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
-  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, N->getDebugLoc());
+  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+                     N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -489,7 +492,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
@@ -531,7 +535,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   // Sign/zero extend the argument if the libcall takes a larger type.
   SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                            NVT, N->getOperand(0));
-  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl);
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+                     &Op, 1, false, dl);
 }
 
 
@@ -1403,7 +1408,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
 
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     ST->getValue().getValueType());
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 48f64c3..548454c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -204,7 +204,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
       std::swap(Lo, Hi);
 
     InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
-                       EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()),
+                       EVT::getIntegerVT(*DAG.getContext(),
+                                         NOutVT.getSizeInBits()),
                        JoinIntegers(Lo, Hi));
     return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
   }
@@ -464,7 +465,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
   return DAG.getNode(ISD::SHL, N->getDebugLoc(),
-                     TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
                      GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
 }
 
@@ -555,7 +556,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
-  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)));
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
@@ -1383,7 +1385,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
@@ -1403,7 +1406,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
 
   if (NVTBits < EVTBits) {
     Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
   } else {
     Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part must be zero, make it explicit.
@@ -1846,7 +1850,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
-                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        ExcessBits)));
   }
 }
 
@@ -1968,7 +1973,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
     SplitInteger(Res, Lo, Hi);
     unsigned ExcessBits =
       Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
-    Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
+    Hi = DAG.getZeroExtendInReg(Hi, dl,
+                                EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
   }
 }
 
@@ -2269,7 +2275,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     unsigned EBytes = ExtVT.getStoreSize();
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
-    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits);
+    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 ExtVT.getSizeInBits() - ExcessBits);
 
     if (ExcessBits < NVT.getSizeInBits()) {
       // Transfer high bits from the top of Lo to the bottom of Hi.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index f3e7ca4f..17f131b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -721,7 +721,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
 }
 
 void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for promoted integer");
   AnalyzeNewValue(Result);
 
@@ -731,7 +732,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for softened float");
   AnalyzeNewValue(Result);
 
@@ -762,7 +764,8 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
                                           SDValue Hi) {
-  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          Hi.getValueType() == Lo.getValueType() &&
          "Invalid type for expanded integer");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -788,7 +791,8 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
 
 void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
                                         SDValue Hi) {
-  assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          Hi.getValueType() == Lo.getValueType() &&
          "Invalid type for expanded float");
   // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -832,7 +836,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
 }
 
 void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for widened vector");
   AnalyzeNewValue(Result);
 
@@ -940,7 +945,8 @@ void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
   } else {
     unsigned NumElements = InVT.getVectorNumElements();
     assert(!(NumElements & 1) && "Splitting vector, but not in half!");
-    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2);
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), NumElements/2);
   }
 }
 
@@ -980,7 +986,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   DebugLoc dlLo = Lo.getDebugLoc();
   EVT LVT = Lo.getValueType();
   EVT HVT = Hi.getValueType();
-  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits());
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                              LVT.getSizeInBits() + HVT.getSizeInBits());
 
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
@@ -1082,7 +1089,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
 /// type half the size of Op's.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
-  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2);
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 Op.getValueType().getSizeInBits()/2);
   SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9dd9796..d60ad60 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -33,7 +33,7 @@ namespace llvm {
 /// into small values.
 ///
 class VISIBILITY_HIDDEN DAGTypeLegalizer {
-  TargetLowering &TLI;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
 public:
   // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 5e83b4b..88e1e62 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -173,8 +173,9 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
 
   SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                 EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts),
-                                 OldVec);
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, 2*OldElts),
+                               OldVec);
 
   // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
   SDValue Idx = N->getOperand(1);
@@ -268,7 +269,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
     // is no point, and it might create expansion loops).  For example, on
     // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
     EVT OVT = N->getOperand(0).getValueType();
-    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2);
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+                               TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+                               2);
 
     if (isTypeLegal(NVT)) {
       SDValue Parts[2];
@@ -312,8 +315,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
   }
 
   SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
-                                 EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()),
-                                 &NewElts[0], NewElts.size());
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, NewElts.size()),
+                               &NewElts[0], NewElts.size());
 
   // Convert the new vector to the old vector type.
   return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
@@ -380,7 +384,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   DebugLoc dl = N->getDebugLoc();
 
   StoreSDNode *St = cast<StoreSDNode>(N);
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType());
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     St->getValue().getValueType());
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
   int SVOffset = St->getSrcValueOffset();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b5f84c0..0e2bd02 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
 namespace {
 class VectorLegalizer {
   SelectionDAG& DAG;
-  TargetLowering& TLI;
+  const TargetLowering &TLI;
   bool Changed; // Keep track of whether anything changed
 
   /// LegalizedNodes - For nodes that are of legal width, and that have more
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ed5f24c..7efeea1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -705,8 +705,9 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
+    TLI.getTargetData()->getPrefTypeAlignment(VecType);
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
                             false, false, 0);
 
@@ -1419,7 +1420,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
     ShOp = GetWidenedVector(ShOp);
     ShVT = ShOp.getValueType();
   }
-  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(),
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+                                   ShVT.getVectorElementType(),
                                    WidenVT.getVectorNumElements());
   if (ShVT != ShWidenVT)
     ShOp = ModifyToType(ShOp, ShWidenVT);
@@ -1493,7 +1495,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     unsigned NewNumElts = WidenSize / InSize;
     if (InVT.isVector()) {
       EVT InEltVT = InVT.getVectorElementType();
-      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits());
+      NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT,
+                                WidenSize / InEltVT.getSizeInBits());
     } else {
       NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
     }
@@ -1617,7 +1620,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   SDValue RndOp = N->getOperand(3);
   SDValue SatOp = N->getOperand(4);
 
-  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
 
   EVT InVT = InOp.getValueType();
@@ -1791,7 +1795,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
   EVT CondVT = Cond1.getValueType();
   if (CondVT.isVector()) {
     EVT CondEltVT = CondVT.getVectorElementType();
-    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts);
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(),
+                                        CondEltVT, WidenNumElts);
     if (getTypeAction(CondVT) == WidenVector)
       Cond1 = GetWidenedVector(Cond1);
 
@@ -1859,7 +1864,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
   SDValue InOp1 = N->getOperand(0);
   EVT InVT = InOp1.getValueType();
   assert(InVT.isVector() && "can not widen non vector type");
-  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts);
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), WidenNumElts);
   InOp1 = GetWidenedVector(InOp1);
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
 
@@ -2124,7 +2130,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   // The routines chops the vector into the largest vector loads with the same
   // element type or scalar loads and then recombines it to the widen vector
   // type.
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
   unsigned WidenWidth = WidenVT.getSizeInBits();
   EVT LdVT    = LD->getMemoryVT();
   DebugLoc dl = LD->getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 9d1568f..ac2d338 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -41,7 +41,7 @@ private:
       SDNode *Node;         // valid for expressions
       unsigned ResNo;       // valid for expressions
     } s;
-    Value *Const;           // valid for constants
+    const Value *Const;     // valid for constants
     unsigned FrameIx;       // valid for stack objects
   } u;
   MDNode *mdPtr;
@@ -60,7 +60,8 @@ public:
   }
 
   // Constructor for constants.
-  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) : 
+  SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+             unsigned O) : 
     mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
     kind = CONST;
     u.Const = C;
@@ -86,7 +87,7 @@ public:
   unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
 
   // Returns the Value* for a constant
-  Value *getConst() { assert (kind==CONST); return u.Const; }
+  const Value *getConst() { assert (kind==CONST); return u.Const; }
 
   // Returns the FrameIx for a stack object
   unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3f1766d..da02850 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -17,18 +17,19 @@
 
 #define DEBUG_TYPE "pre-RA-sched"
 #include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
@@ -647,13 +648,14 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
       if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
         --NumOps;  // Ignore the flag operand.
 
-      for (unsigned i = 2; i != NumOps;) {
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
         unsigned Flags =
           cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
-        unsigned NumVals = (Flags & 0xffff) >> 3;
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 
         ++i; // Skip the ID value.
-        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
+        if (InlineAsm::isRegDefKind(Flags) ||
+            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index e7ab2f0..76e4771 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -353,8 +353,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpSU->Latency, PhysReg);
         if (!isChain && !UnitLatencies) {
-          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
-          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+          ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep));
+          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
         SU->addPred(dep);
@@ -422,7 +422,6 @@ namespace {
 // instructions in the right order.
 static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
                            InstrEmitter &Emitter,
-                           DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM,
                            DenseMap<SDValue, unsigned> &VRBaseMap,
                     SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
                            SmallSet<unsigned, 8> &Seen) {
@@ -449,9 +448,11 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
       continue;
     unsigned DVOrder = DVs[i]->getOrder();
     if (DVOrder == ++Order) {
-      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], BB, VRBaseMap, EM);
-      Orders.push_back(std::make_pair(DVOrder, DbgMI));
-      BB->insert(InsertPos, DbgMI);
+      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+      if (DbgMI) {
+        Orders.push_back(std::make_pair(DVOrder, DbgMI));
+        BB->insert(InsertPos, DbgMI);
+      }
       DVs[i]->setIsInvalidated();
     }
   }
@@ -459,8 +460,7 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
 
 
 /// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::
-EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
   InstrEmitter Emitter(BB, InsertPos);
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -468,6 +468,17 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
   SmallSet<unsigned, 8> Seen;
   bool HasDbg = DAG->hasDebugValues();
 
+  // If this is the first BB, emit byval parameter dbg_value's.
+  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+    for (; PDI != PDE; ++PDI) {
+      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+      if (DbgMI)
+        BB->insert(BB->end(), DbgMI);
+    }
+  }
+
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     SUnit *SU = Sequence[i];
     if (!SU) {
@@ -491,21 +502,21 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
     while (!FlaggedNodes.empty()) {
       SDNode *N = FlaggedNodes.back();
       Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
-                       VRBaseMap, EM);
-      // Remember the the source order of the inserted instruction.
+                       VRBaseMap);
+      // Remember the source order of the inserted instruction.
       if (HasDbg)
-        ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen);
+        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
       FlaggedNodes.pop_back();
     }
     Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
-                     VRBaseMap, EM);
-    // Remember the the source order of the inserted instruction.
+                     VRBaseMap);
+    // Remember the source order of the inserted instruction.
     if (HasDbg)
-      ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders,
+      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
                         Seen);
   }
 
-  // Insert all the dbg_value which have not already been inserted in source
+  // Insert all the dbg_values which have not already been inserted in source
   // order sequence.
   if (HasDbg) {
     MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
@@ -540,13 +551,15 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
 #endif
         if ((*DI)->isInvalidated())
           continue;
-        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, MIBB, VRBaseMap, EM);
-        if (!LastOrder)
-          // Insert to start of the BB (after PHIs).
-          BB->insert(BBBegin, DbgMI);
-        else {
-          MachineBasicBlock::iterator Pos = MI;
-          MIBB->insert(llvm::next(Pos), DbgMI);
+        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI) {
+          if (!LastOrder)
+            // Insert to start of the BB (after PHIs).
+            BB->insert(BBBegin, DbgMI);
+          else {
+            MachineBasicBlock::iterator Pos = MI;
+            MIBB->insert(llvm::next(Pos), DbgMI);
+          }
         }
       }
       LastOrder = Order;
@@ -558,8 +571,9 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
       MachineBasicBlock *InsertBB = Emitter.getBlock();
       MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
       if (!(*DI)->isInvalidated()) {
-        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, InsertBB, VRBaseMap, EM);
-        InsertBB->insert(Pos, DbgMI);
+        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI)
+          InsertBB->insert(Pos, DbgMI);
       }
       ++DI;
     }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 6b829b6..7ae8ec2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -59,7 +59,8 @@ namespace llvm {
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
       if (isa<BlockAddressSDNode>(Node))   return true;
-      if (Node->getOpcode() == ISD::EntryToken) return true;
+      if (Node->getOpcode() == ISD::EntryToken ||
+          isa<MDNodeSDNode>(Node)) return true;
       return false;
     }
 
@@ -93,8 +94,7 @@ namespace llvm {
     ///
     virtual void ComputeLatency(SUnit *SU);
 
-    virtual MachineBasicBlock *
-    EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+    virtual MachineBasicBlock *EmitSchedule();
 
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8c0554d..e6df742 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -304,10 +304,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
   return Result;
 }
 
-const TargetMachine &SelectionDAG::getTarget() const {
-  return MF->getTarget();
-}
-
 //===----------------------------------------------------------------------===//
 //                           SDNode Profile Support
 //===----------------------------------------------------------------------===//
@@ -792,8 +788,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
-  : TLI(tli), FLI(fli),
+SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
+  : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli),
     EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
@@ -1048,7 +1044,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
                                       unsigned Alignment, int Offset,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
@@ -1319,7 +1315,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
 }
 
 
-SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
   unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
@@ -1356,6 +1352,23 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   return SDValue(N, 0);
 }
 
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MD);
+  
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+  
+  SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
 /// getShiftAmountOperand - Return the specified value casted to
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
@@ -1904,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
-    APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       BitWidth - Mask.countLeadingZeros());
     ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
     unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
@@ -2253,7 +2267,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
   if (!GA) return false;
   if (GA->getOffset() != 0) return false;
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
   if (!GV) return false;
   return MF->getMMI().hasDebugInfo();
 }
@@ -2778,14 +2792,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       // If the indices are the same, return the inserted element else
       // if the indices are known different, extract the element from
       // the original vector.
-      if (N1.getOperand(2) == N2) {
-        if (VT == N1.getOperand(1).getValueType())
-          return N1.getOperand(1);
-        else
-          return getSExtOrTrunc(N1.getOperand(1), DL, VT);
-      } else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
-                 isa<ConstantSDNode>(N2))
+      SDValue N1Op2 = N1.getOperand(2);
+      ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+
+      if (N1Op2C && N2C) {
+        if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+          if (VT == N1.getOperand(1).getValueType())
+            return N1.getOperand(1);
+          else
+            return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+        }
+
         return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+      }
     }
     break;
   case ISD::EXTRACT_ELEMENT:
@@ -3178,7 +3197,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
   if (!G)
     return false;
 
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
   if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
     return true;
 
@@ -3193,6 +3212,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
                                      bool NonScalarIntSafe,
+                                     bool MemcpyStrSrc,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -3201,9 +3221,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   // the value, i.e. memset or memcpy from constant string. Otherwise, it's
   // the inferred alignment of the source. 'DstAlign', on the other hand, is the
   // specified alignment of the memory operation. If it is zero, that means
-  // it's possible to change the alignment of the destination.
+  // it's possible to change the alignment of the destination. 'MemcpyStrSrc'
+  // indicates whether the memcpy source is constant so it does not need to be
+  // loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
-                                   NonScalarIntSafe, DAG);
+                                   NonScalarIntSafe, MemcpyStrSrc,
+                                   DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
     if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
@@ -3269,9 +3292,6 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   // below a certain threshold.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
-  uint64_t Limit = -1ULL;
-  if (!AlwaysInline)
-    Limit = TLI.getMaxStoresPerMemcpy();
   bool DstAlignCanChange = false;
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
@@ -3283,9 +3303,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   std::string Str;
   bool CopyFromStr = isMemSrcFromString(Src, Str);
   bool isZeroStr = CopyFromStr && Str.empty();
+  uint64_t Limit = -1ULL;
+  if (!AlwaysInline)
+    Limit = TLI.getMaxStoresPerMemcpy();
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
-                                (isZeroStr ? 0 : SrcAlign), true, DAG, TLI))
+                                (isZeroStr ? 0 : SrcAlign),
+                                true, CopyFromStr, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3373,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
-                                SrcAlign, true, DAG, TLI))
+                                SrcAlign, true, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3445,7 +3469,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                NonScalarIntSafe, DAG, TLI))
+                                NonScalarIntSafe, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3571,8 +3595,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   if (Result.getNode())
     return Result;
 
+  // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+  // not be safe.  See memcpy above for more details.
+
   // Emit a library call.
-  assert(!isVol && "library memmove does not support volatile");
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
@@ -3620,8 +3646,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   if (Result.getNode())
     return Result;
 
-  // Emit a library call.
-  assert(!isVol && "library memset does not support volatile");
+  // Emit a library call.  
   const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -4913,7 +4938,7 @@ SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
 }
 
 SDDbgValue *
-SelectionDAG::getDbgValue(MDNode *MDPtr, Value *C, uint64_t Off,
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
                           DebugLoc DL, unsigned O) {
   return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
 }
@@ -5321,8 +5346,8 @@ unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
 
 /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
 /// value is produced by SD.
-void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD) {
-  DbgInfo->add(DB, SD);
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+  DbgInfo->add(DB, SD, isParameter);
   if (SD)
     SD->setHasDebugValue(true);
 }
@@ -5338,7 +5363,7 @@ HandleSDNode::~HandleSDNode() {
 GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
                                          EVT VT, int64_t o, unsigned char TF)
   : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) {
-  TheGlobal = const_cast<GlobalValue*>(GA);
+  TheGlobal = GA;
 }
 
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
@@ -5558,6 +5583,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::PCMARKER:      return "PCMarker";
   case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
   case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::MDNODE_SDNODE: return "MDNode";
   case ISD::EntryToken:    return "EntryToken";
   case ISD::TokenFactor:   return "TokenFactor";
   case ISD::AssertSext:    return "AssertSext";
@@ -5926,6 +5952,11 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << "<" << M->getValue() << ">";
     else
       OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
   } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
     OS << ":" << N->getVT().getEVTString();
   }
@@ -6063,7 +6094,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
 
   unsigned i;
   for (i= 0; i != NE; ++i) {
-    for (unsigned j = 0; j != N->getNumOperands(); ++j) {
+    for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
       SDValue Operand = N->getOperand(j);
       EVT OperandVT = Operand.getValueType();
       if (OperandVT.isVector()) {
@@ -6141,8 +6172,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
       return true;
   }
 
-  GlobalValue *GV1 = NULL;
-  GlobalValue *GV2 = NULL;
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
   int64_t Offset1 = 0;
   int64_t Offset2 = 0;
   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
@@ -6157,14 +6188,14 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
 /// it cannot be inferred.
 unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   // If this is a GlobalAddress + cst, return the alignment.
-  GlobalValue *GV;
+  const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
     // If GV has specified alignment, then use it. Otherwise, use the preferred
     // alignment.
     unsigned Align = GV->getAlignment();
     if (!Align) {
-      if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
         if (GVar->hasInitializer()) {
           const TargetData *TD = TLI.getTargetData();
           Align = TD->getPreferredAlignment(GVar);
@@ -6326,8 +6357,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
     if (OpVal.getOpcode() == ISD::UNDEF)
       SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
     else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
-      SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
-                     zextOrTrunc(sz) << BitPos);
+      SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+                    zextOrTrunc(sz) << BitPos;
     else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
       SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
      else
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4bbb3de..a38b204 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -28,7 +28,9 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -168,7 +170,7 @@ namespace {
     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
     /// operand list.  This adds the code marker, matching input operand index
     /// (if applicable), and includes the number of values added into it.
-    void AddInlineAsmOperands(unsigned Code,
+    void AddInlineAsmOperands(unsigned Kind,
                               bool HasMatching, unsigned MatchingIdx,
                               SelectionDAG &DAG,
                               std::vector<SDValue> &Ops) const;
@@ -533,7 +535,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
   TD = DAG.getTarget().getTargetData();
 }
 
-/// clear - Clear out the curret SelectionDAG and the associated
+/// clear - Clear out the current SelectionDAG and the associated
 /// state and prepare this SelectionDAGBuilder object to be used
 /// for a new block. This doesn't clear out information about
 /// additional blocks that are needed to complete switch lowering
@@ -543,8 +545,6 @@ void SelectionDAGBuilder::clear() {
   NodeMap.clear();
   PendingLoads.clear();
   PendingExports.clear();
-  EdgeMapping.clear();
-  DAG.clear();
   CurDebugLoc = DebugLoc();
   HasTailCall = false;
 }
@@ -612,11 +612,26 @@ void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
     AssignOrderingToNode(Node->getOperand(I).getNode());
 }
 
-void SelectionDAGBuilder::visit(Instruction &I) {
+void SelectionDAGBuilder::visit(const Instruction &I) {
+  // Set up outgoing PHI node register values before emitting the terminator.
+  if (isa<TerminatorInst>(&I))
+    HandlePHINodesInSuccessorBlocks(I.getParent());
+
+  CurDebugLoc = I.getDebugLoc();
+
   visit(I.getOpcode(), I);
+
+  if (!isa<TerminatorInst>(&I) && !HasTailCall)
+    CopyToExportRegsIfNeeded(&I);
+
+  CurDebugLoc = DebugLoc();
 }
 
-void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
@@ -638,28 +653,28 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
   SDValue &N = NodeMap[V];
   if (N.getNode()) return N;
 
-  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+  if (const Constant *C = dyn_cast<Constant>(V)) {
     EVT VT = TLI.getValueType(V->getType(), true);
 
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
       return N = DAG.getConstant(*CI, VT);
 
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
       return N = DAG.getGlobalAddress(GV, VT);
 
     if (isa<ConstantPointerNull>(C))
       return N = DAG.getConstant(0, TLI.getPointerTy());
 
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
       return N = DAG.getConstantFP(*CFP, VT);
 
     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
       return N = DAG.getUNDEF(VT);
 
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       visit(CE->getOpcode(), *CE);
       SDValue N1 = NodeMap[V];
-      assert(N1.getNode() && "visit didn't populate the ValueMap!");
+      assert(N1.getNode() && "visit didn't populate the NodeMap!");
       return N1;
     }
 
@@ -704,7 +719,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
                                 getCurDebugLoc());
     }
 
-    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
       return DAG.getBlockAddress(BA, VT);
 
     const VectorType *VecTy = cast<VectorType>(V->getType());
@@ -713,7 +728,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     // Now that we know the number and type of the elements, get that number of
     // elements into the Ops array based on what kind of constant it is.
     SmallVector<SDValue, 16> Ops;
-    if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
       for (unsigned i = 0; i != NumElements; ++i)
         Ops.push_back(getValue(CP->getOperand(i)));
     } else {
@@ -756,7 +771,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
 static void getReturnInfo(const Type* ReturnType,
                    Attributes attr, SmallVectorImpl<EVT> &OutVTs,
                    SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
-                   TargetLowering &TLI,
+                   const TargetLowering &TLI,
                    SmallVectorImpl<uint64_t> *Offsets = 0) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, ReturnType, ValueVTs);
@@ -811,7 +826,7 @@ static void getReturnInfo(const Type* ReturnType,
   }
 }
 
-void SelectionDAGBuilder::visitRet(ReturnInst &I) {
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   SDValue Chain = getControlRoot();
   SmallVector<ISD::OutputArg, 8> Outs;
   FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
@@ -916,18 +931,18 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
 /// created for it, emit nodes to copy the value into the virtual
 /// registers.
-void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
-  if (!V->use_empty()) {
-    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
-    if (VMI != FuncInfo.ValueMap.end())
-      CopyValueToVirtualRegister(V, VMI->second);
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+  if (VMI != FuncInfo.ValueMap.end()) {
+    assert(!V->use_empty() && "Unused value assigned virtual registers!");
+    CopyValueToVirtualRegister(V, VMI->second);
   }
 }
 
 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
 /// the current basic block, add it to ValueMap now so that we'll get a
 /// CopyTo/FromReg.
-void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
   // No need to export constants.
   if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
 
@@ -938,11 +953,11 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
   CopyValueToVirtualRegister(V, Reg);
 }
 
-bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
                                                      const BasicBlock *FromBB) {
   // The operands of the setcc have to be in this block.  We don't know
   // how to export them from some other block.
-  if (Instruction *VI = dyn_cast<Instruction>(V)) {
+  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
     // Can export from current BB.
     if (VI->getParent() == FromBB)
       return true;
@@ -971,85 +986,31 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
   return true;
 }
 
-/// getFCmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR floating-point condition code.  This includes
-/// consideration of global floating-point math flags.
-///
-static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
-  ISD::CondCode FPC, FOC;
-  switch (Pred) {
-  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
-  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
-  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
-  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
-  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
-  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
-  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
-  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
-  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
-  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
-  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
-  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
-  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
-  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
-  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
-  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
-  default:
-    llvm_unreachable("Invalid FCmp predicate opcode!");
-    FOC = FPC = ISD::SETFALSE;
-    break;
-  }
-  if (FiniteOnlyFPMath())
-    return FOC;
-  else
-    return FPC;
-}
-
-/// getICmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR integer condition code.
-///
-static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
-  switch (Pred) {
-  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
-  case ICmpInst::ICMP_NE:  return ISD::SETNE;
-  case ICmpInst::ICMP_SLE: return ISD::SETLE;
-  case ICmpInst::ICMP_ULE: return ISD::SETULE;
-  case ICmpInst::ICMP_SGE: return ISD::SETGE;
-  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
-  case ICmpInst::ICMP_SLT: return ISD::SETLT;
-  case ICmpInst::ICMP_ULT: return ISD::SETULT;
-  case ICmpInst::ICMP_SGT: return ISD::SETGT;
-  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
-  default:
-    llvm_unreachable("Invalid ICmp predicate opcode!");
-    return ISD::SETNE;
-  }
-}
-
 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
 /// This function emits a branch and is used at the leaves of an OR or an
 /// AND operator tree.
 ///
 void
-SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
                                                   MachineBasicBlock *TBB,
                                                   MachineBasicBlock *FBB,
-                                                  MachineBasicBlock *CurBB) {
+                                                  MachineBasicBlock *CurBB,
+                                                  MachineBasicBlock *SwitchBB) {
   const BasicBlock *BB = CurBB->getBasicBlock();
 
   // If the leaf of the tree is a comparison, merge the condition into
   // the caseblock.
-  if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
     // The operands of the cmp have to be in this block.  We don't know
     // how to export them from some other block.  If this is the first block
     // of the sequence, no exporting is needed.
-    if (CurBB == CurMBB ||
+    if (CurBB == SwitchBB ||
         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
          isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
       ISD::CondCode Condition;
-      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
         Condition = getICmpCondCode(IC->getPredicate());
-      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
         Condition = getFCmpCondCode(FC->getPredicate());
       } else {
         Condition = ISD::SETEQ; // silence warning.
@@ -1070,19 +1031,20 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
 }
 
 /// FindMergedConditions - If Cond is an expression like
-void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                MachineBasicBlock *TBB,
                                                MachineBasicBlock *FBB,
                                                MachineBasicBlock *CurBB,
+                                               MachineBasicBlock *SwitchBB,
                                                unsigned Opc) {
   // If this node is not part of the or/and tree, emit it as a branch.
-  Instruction *BOp = dyn_cast<Instruction>(Cond);
+  const Instruction *BOp = dyn_cast<Instruction>(Cond);
   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
       (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
       BOp->getParent() != CurBB->getBasicBlock() ||
       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
-    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
     return;
   }
 
@@ -1102,10 +1064,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
     //
 
     // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
 
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   } else {
     assert(Opc == Instruction::And && "Unknown merge op!");
     // Codegen X & Y as:
@@ -1118,10 +1080,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
     //  This requires creation of TmpBB after CurBB.
 
     // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
 
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   }
 }
 
@@ -1156,19 +1118,21 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
   return true;
 }
 
-void SelectionDAGBuilder::visitBr(BranchInst &I) {
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+  MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Update machine-CFG edges.
   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
 
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = BrMBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   if (I.isUnconditional()) {
     // Update machine-CFG edges.
-    CurMBB->addSuccessor(Succ0MBB);
+    BrMBB->addSuccessor(Succ0MBB);
 
     // If this is not a fall-through branch, emit the branch.
     if (Succ0MBB != NextBlock)
@@ -1181,7 +1145,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
 
   // If this condition is one of the special cases we handle, do special stuff
   // now.
-  Value *CondVal = I.getCondition();
+  const Value *CondVal = I.getCondition();
   MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
 
   // If this is a series of conditions that are or'd or and'd together, emit
@@ -1199,15 +1163,16 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
   //     cmp D, E
   //     jle foo
   //
-  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
     if (BOp->hasOneUse() &&
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
-      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+                           BOp->getOpcode());
       // If the compares in later blocks need to use values not currently
       // exported from this block, export them now.  This block should always
       // be the first entry.
-      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
 
       // Allow some cases to be rejected.
       if (ShouldEmitAsBranches(SwitchCases)) {
@@ -1217,7 +1182,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
         }
 
         // Emit the branch for this block.
-        visitSwitchCase(SwitchCases[0]);
+        visitSwitchCase(SwitchCases[0], BrMBB);
         SwitchCases.erase(SwitchCases.begin());
         return;
       }
@@ -1233,16 +1198,17 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
 
   // Create a CaseBlock record representing this branch.
   CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
-               NULL, Succ0MBB, Succ1MBB, CurMBB);
+               NULL, Succ0MBB, Succ1MBB, BrMBB);
 
   // Use visitSwitchCase to actually insert the fast branch sequence for this
   // cond branch.
-  visitSwitchCase(CB);
+  visitSwitchCase(CB, BrMBB);
 }
 
 /// visitSwitchCase - Emits the necessary code to represent a single node in
 /// the binary search tree resulting from lowering a switch instruction.
-void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+                                          MachineBasicBlock *SwitchBB) {
   SDValue Cond;
   SDValue CondLHS = getValue(CB.CmpLHS);
   DebugLoc dl = getCurDebugLoc();
@@ -1281,13 +1247,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
   }
 
   // Update successor info
-  CurMBB->addSuccessor(CB.TrueBB);
-  CurMBB->addSuccessor(CB.FalseBB);
+  SwitchBB->addSuccessor(CB.TrueBB);
+  SwitchBB->addSuccessor(CB.FalseBB);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
@@ -1305,11 +1271,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
 
   // If the branch was constant folded, fix up the CFG.
   if (BrCond.getOpcode() == ISD::BR) {
-    CurMBB->removeSuccessor(CB.FalseBB);
+    SwitchBB->removeSuccessor(CB.FalseBB);
   } else {
     // Otherwise, go ahead and insert the false branch.
     if (BrCond == getControlRoot())
-      CurMBB->removeSuccessor(CB.TrueBB);
+      SwitchBB->removeSuccessor(CB.TrueBB);
 
     if (CB.FalseBB != NextBlock)
       BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
@@ -1336,7 +1302,8 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
 /// visitJumpTableHeader - This function emits necessary code to produce index
 /// in the JumpTable from switch case.
 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
-                                               JumpTableHeader &JTH) {
+                                               JumpTableHeader &JTH,
+                                               MachineBasicBlock *SwitchBB) {
   // Subtract the lowest switch case value from the value being switched on and
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
@@ -1368,7 +1335,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
 
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
@@ -1386,7 +1353,8 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
 
 /// visitBitTestHeader - This function emits necessary code to produce value
 /// suitable for "bit tests"
-void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+                                             MachineBasicBlock *SwitchBB) {
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
   EVT VT = SwitchOp.getValueType();
@@ -1409,14 +1377,14 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
-  CurMBB->addSuccessor(B.Default);
-  CurMBB->addSuccessor(MBB);
+  SwitchBB->addSuccessor(B.Default);
+  SwitchBB->addSuccessor(MBB);
 
   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                 MVT::Other, CopyTo, RangeCmp,
@@ -1432,7 +1400,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
 /// visitBitTestCase - this function produces one "bit test"
 void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
                                            unsigned Reg,
-                                           BitTestCase &B) {
+                                           BitTestCase &B,
+                                           MachineBasicBlock *SwitchBB) {
   // Make desired shift
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
                                        TLI.getPointerTy());
@@ -1450,8 +1419,8 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
                                 AndOp, DAG.getConstant(0, TLI.getPointerTy()),
                                 ISD::SETNE);
 
-  CurMBB->addSuccessor(B.TargetBB);
-  CurMBB->addSuccessor(NextMBB);
+  SwitchBB->addSuccessor(B.TargetBB);
+  SwitchBB->addSuccessor(NextMBB);
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -1460,7 +1429,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
+  MachineFunction::iterator BBI = SwitchBB;
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
@@ -1471,7 +1440,9 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
   DAG.setRoot(BrAnd);
 }
 
-void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+  MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Retrieve successors.
   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
   MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
@@ -1487,8 +1458,8 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
   CopyToExportRegsIfNeeded(&I);
 
   // Update successor info
-  CurMBB->addSuccessor(Return);
-  CurMBB->addSuccessor(LandingPad);
+  InvokeMBB->addSuccessor(Return);
+  InvokeMBB->addSuccessor(LandingPad);
 
   // Drop into normal successor.
   DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1496,15 +1467,16 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
                           DAG.getBasicBlock(Return)));
 }
 
-void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
+void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
 }
 
 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
 /// small case ranges).
 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                                  CaseRecVector& WorkList,
-                                                 Value* SV,
-                                                 MachineBasicBlock* Default) {
+                                                 const Value* SV,
+                                                 MachineBasicBlock *Default,
+                                                 MachineBasicBlock *SwitchBB) {
   Case& BackCase  = *(CR.Range.second-1);
 
   // Size is the number of Cases represented by this range.
@@ -1557,7 +1529,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
       FallThrough = Default;
     }
 
-    Value *RHS, *LHS, *MHS;
+    const Value *RHS, *LHS, *MHS;
     ISD::CondCode CC;
     if (I->High == I->Low) {
       // This is just small small case range :) containing exactly 1 case
@@ -1573,8 +1545,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
     // code into the current block.  Otherwise, push the CaseBlock onto the
     // vector to be later processed by SDISel, and insert the node's MBB
     // before the next MBB.
-    if (CurBlock == CurMBB)
-      visitSwitchCase(CB);
+    if (CurBlock == SwitchBB)
+      visitSwitchCase(CB, SwitchBB);
     else
       SwitchCases.push_back(CB);
 
@@ -1600,8 +1572,9 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) {
 /// handleJTSwitchCase - Emit jumptable for current switch case range
 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
                                              CaseRecVector& WorkList,
-                                             Value* SV,
-                                             MachineBasicBlock* Default) {
+                                             const Value* SV,
+                                             MachineBasicBlock* Default,
+                                             MachineBasicBlock *SwitchBB) {
   Case& FrontCase = *CR.Range.first;
   Case& BackCase  = *(CR.Range.second-1);
 
@@ -1613,7 +1586,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
        I!=E; ++I)
     TSize += I->size();
 
-  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
+  if (!areJTsAllowed(TLI) || TSize.ult(4))
     return false;
 
   APInt Range = ComputeRange(First, Last);
@@ -1682,9 +1655,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   // Set the jump table information so that we can codegen it as a second
   // MachineBasicBlock
   JumpTable JT(-1U, JTI, JumpTableBB, Default);
-  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
-  if (CR.CaseBB == CurMBB)
-    visitJumpTableHeader(JT, JTH);
+  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+  if (CR.CaseBB == SwitchBB)
+    visitJumpTableHeader(JT, JTH, SwitchBB);
 
   JTCases.push_back(JumpTableBlock(JTH, JT));
 
@@ -1695,8 +1668,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
 /// 2 subtrees.
 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
                                                   CaseRecVector& WorkList,
-                                                  Value* SV,
-                                                  MachineBasicBlock* Default) {
+                                                  const Value* SV,
+                                                  MachineBasicBlock *Default,
+                                                  MachineBasicBlock *SwitchBB) {
   // Get the MachineFunction which holds the current MBB.  This is used when
   // inserting any additional MBBs necessary to represent the switch.
   MachineFunction *CurMF = FuncInfo.MF;
@@ -1810,8 +1784,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
   // Otherwise, branch to LHS.
   CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
 
-  if (CR.CaseBB == CurMBB)
-    visitSwitchCase(CB);
+  if (CR.CaseBB == SwitchBB)
+    visitSwitchCase(CB, SwitchBB);
   else
     SwitchCases.push_back(CB);
 
@@ -1823,8 +1797,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
 /// of masks and emit bit tests with these masks.
 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
                                                    CaseRecVector& WorkList,
-                                                   Value* SV,
-                                                   MachineBasicBlock* Default){
+                                                   const Value* SV,
+                                                   MachineBasicBlock* Default,
+                                                   MachineBasicBlock *SwitchBB){
   EVT PTy = TLI.getPointerTy();
   unsigned IntPtrBits = PTy.getSizeInBits();
 
@@ -1867,7 +1842,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
                << "Low bound: " << minValue << '\n'
                << "High bound: " << maxValue << '\n');
 
-  if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+  if (cmpRange.uge(IntPtrBits) ||
       (!(Dests.size() == 1 && numCmps >= 3) &&
        !(Dests.size() == 2 && numCmps >= 5) &&
        !(Dests.size() >= 3 && numCmps >= 6)))
@@ -1879,8 +1854,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   // Optimize the case where all the case values fit in a
   // word without having to subtract minValue. In this case,
   // we can optimize away the subtraction.
-  if (minValue.isNonNegative() &&
-      maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
     cmpRange = maxValue;
   } else {
     lowBound = minValue;
@@ -1940,11 +1914,11 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   }
 
   BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, (CR.CaseBB == CurMBB),
+                   -1U, (CR.CaseBB == SwitchBB),
                    CR.CaseBB, Default, BTC);
 
-  if (CR.CaseBB == CurMBB)
-    visitBitTestHeader(BTB);
+  if (CR.CaseBB == SwitchBB)
+    visitBitTestHeader(BTB, SwitchBB);
 
   BitTestCases.push_back(BTB);
 
@@ -1994,7 +1968,9 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   return numCmps;
 }
 
-void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()];
+
   // Figure out which block is immediately after the current one.
   MachineBasicBlock *NextBlock = 0;
   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -2005,7 +1981,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
     // Update machine-CFG edges.
 
     // If this is not a fall-through branch, emit the branch.
-    CurMBB->addSuccessor(Default);
+    SwitchMBB->addSuccessor(Default);
     if (Default != NextBlock)
       DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -2026,38 +2002,41 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
   // Get the Value to be switched on and default basic blocks, which will be
   // inserted into CaseBlock records, representing basic blocks in the binary
   // search tree.
-  Value *SV = SI.getOperand(0);
+  const Value *SV = SI.getOperand(0);
 
   // Push the initial CaseRec onto the worklist
   CaseRecVector WorkList;
-  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+  WorkList.push_back(CaseRec(SwitchMBB,0,0,
+                             CaseRange(Cases.begin(),Cases.end())));
 
   while (!WorkList.empty()) {
     // Grab a record representing a case range to process off the worklist
     CaseRec CR = WorkList.back();
     WorkList.pop_back();
 
-    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // If the range has few cases (two or less) emit a series of specific
     // tests.
-    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // If the switch has more than 5 blocks, and at least 40% dense, and the
     // target supports indirect branches, then emit a jump table rather than
     // lowering the switch to a binary tree of conditional branches.
-    if (handleJTSwitchCase(CR, WorkList, SV, Default))
+    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
     // Emit binary tree. We need to pick a pivot, and push left and right ranges
     // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
-    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
   }
 }
 
-void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()];
+
   // Update machine-CFG edges with unique successors.
   SmallVector<BasicBlock*, 32> succs;
   succs.reserve(I.getNumSuccessors());
@@ -2066,14 +2045,14 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
   array_pod_sort(succs.begin(), succs.end());
   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
   for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
                           getValue(I.getAddress())));
 }
 
-void SelectionDAGBuilder::visitFSub(User &I) {
+void SelectionDAGBuilder::visitFSub(const User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
   if (Ty->isVectorTy()) {
@@ -2103,14 +2082,14 @@ void SelectionDAGBuilder::visitFSub(User &I) {
   visitBinary(I, ISD::FSUB);
 }
 
-void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   if (!I.getType()->isVectorTy() &&
@@ -2144,11 +2123,11 @@ void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
                            Op1.getValueType(), Op1, Op2));
 }
 
-void SelectionDAGBuilder::visitICmp(User &I) {
+void SelectionDAGBuilder::visitICmp(const User &I) {
   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
-  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
     predicate = IC->getPredicate();
-  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
     predicate = ICmpInst::Predicate(IC->getPredicate());
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
@@ -2158,11 +2137,11 @@ void SelectionDAGBuilder::visitICmp(User &I) {
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
 }
 
-void SelectionDAGBuilder::visitFCmp(User &I) {
+void SelectionDAGBuilder::visitFCmp(const User &I) {
   FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
-  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
     predicate = FC->getPredicate();
-  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
     predicate = FCmpInst::Predicate(FC->getPredicate());
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
@@ -2171,7 +2150,7 @@ void SelectionDAGBuilder::visitFCmp(User &I) {
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
 
-void SelectionDAGBuilder::visitSelect(User &I) {
+void SelectionDAGBuilder::visitSelect(const User &I) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   unsigned NumValues = ValueVTs.size();
@@ -2196,14 +2175,14 @@ void SelectionDAGBuilder::visitSelect(User &I) {
                            &Values[0], NumValues));
 }
 
-void SelectionDAGBuilder::visitTrunc(User &I) {
+void SelectionDAGBuilder::visitTrunc(const User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitZExt(User &I) {
+void SelectionDAGBuilder::visitZExt(const User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2211,7 +2190,7 @@ void SelectionDAGBuilder::visitZExt(User &I) {
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitSExt(User &I) {
+void SelectionDAGBuilder::visitSExt(const User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
@@ -2219,7 +2198,7 @@ void SelectionDAGBuilder::visitSExt(User &I) {
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPTrunc(User &I) {
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
@@ -2227,42 +2206,42 @@ void SelectionDAGBuilder::visitFPTrunc(User &I) {
                            DestVT, N, DAG.getIntPtrConstant(0)));
 }
 
-void SelectionDAGBuilder::visitFPExt(User &I){
+void SelectionDAGBuilder::visitFPExt(const User &I){
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPToUI(User &I) {
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitFPToSI(User &I) {
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitUIToFP(User &I) {
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitSIToFP(User &I){
+void SelectionDAGBuilder::visitSIToFP(const User &I){
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
-void SelectionDAGBuilder::visitPtrToInt(User &I) {
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2271,7 +2250,7 @@ void SelectionDAGBuilder::visitPtrToInt(User &I) {
   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
-void SelectionDAGBuilder::visitIntToPtr(User &I) {
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
@@ -2280,7 +2259,7 @@ void SelectionDAGBuilder::visitIntToPtr(User &I) {
   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
-void SelectionDAGBuilder::visitBitCast(User &I) {
+void SelectionDAGBuilder::visitBitCast(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
 
@@ -2293,7 +2272,7 @@ void SelectionDAGBuilder::visitBitCast(User &I) {
     setValue(&I, N);            // noop cast.
 }
 
-void SelectionDAGBuilder::visitInsertElement(User &I) {
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InVal = getValue(I.getOperand(1));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
@@ -2304,7 +2283,7 @@ void SelectionDAGBuilder::visitInsertElement(User &I) {
                            InVec, InVal, InIdx));
 }
 
-void SelectionDAGBuilder::visitExtractElement(User &I) {
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
                               TLI.getPointerTy(),
@@ -2323,7 +2302,7 @@ static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
   return true;
 }
 
-void SelectionDAGBuilder::visitShuffleVector(User &I) {
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   SmallVector<int, 8> Mask;
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
@@ -2504,7 +2483,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
                            VT, &Ops[0], Ops.size()));
 }
 
-void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Value *Op1 = I.getOperand(1);
   const Type *AggTy = I.getType();
@@ -2545,7 +2524,7 @@ void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
                            &Values[0], NumAggValues));
 }
 
-void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   const Value *Op0 = I.getOperand(0);
   const Type *AggTy = Op0->getType();
   const Type *ValTy = I.getType();
@@ -2573,13 +2552,13 @@ void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
                            &Values[0], NumValValues));
 }
 
-void SelectionDAGBuilder::visitGetElementPtr(User &I) {
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   const Type *Ty = I.getOperand(0)->getType();
 
-  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+  for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
-    Value *Idx = *OI;
+    const Value *Idx = *OI;
     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
@@ -2599,7 +2578,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->getZExtValue() == 0) continue;
         uint64_t Offs =
             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
@@ -2650,7 +2629,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
   setValue(&I, N);
 }
 
-void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   // If this is a fixed sized alloca in the entry block of the function,
   // allocate it statically on the stack.
   if (FuncInfo.StaticAllocaMap.count(&I))
@@ -2674,8 +2653,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  unsigned StackAlign =
-    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
   if (Align <= StackAlign)
     Align = 0;
 
@@ -2702,7 +2680,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
   FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
 }
 
-void SelectionDAGBuilder::visitLoad(LoadInst &I) {
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   const Value *SV = I.getOperand(0);
   SDValue Ptr = getValue(SV);
 
@@ -2762,9 +2740,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
                            &Values[0], NumValues));
 }
 
-void SelectionDAGBuilder::visitStore(StoreInst &I) {
-  Value *SrcV = I.getOperand(0);
-  Value *PtrV = I.getOperand(1);
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  const Value *SrcV = I.getOperand(0);
+  const Value *PtrV = I.getOperand(1);
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -2801,7 +2779,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
 /// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
                                                unsigned Intrinsic) {
   bool HasChain = !I.doesNotAccessMemory();
   bool OnlyLoad = HasChain && I.onlyReadsMemory();
@@ -2927,7 +2905,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
 /// visitIntrinsicCall: I is a call instruction
 ///                     Op is the associated NodeType for I
 const char *
-SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
+                                           ISD::NodeType Op) {
   SDValue Root = getRoot();
   SDValue L =
     DAG.getAtomic(Op, getCurDebugLoc(),
@@ -2943,7 +2922,7 @@ SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
 
 // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
 const char *
-SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
   SDValue Op1 = getValue(I.getOperand(1));
   SDValue Op2 = getValue(I.getOperand(2));
 
@@ -2955,7 +2934,7 @@ SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitExp(CallInst &I) {
+SelectionDAGBuilder::visitExp(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3081,7 +3060,7 @@ SelectionDAGBuilder::visitExp(CallInst &I) {
 /// visitLog - Lower a log intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog(CallInst &I) {
+SelectionDAGBuilder::visitLog(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3191,7 +3170,7 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
 /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog2(CallInst &I) {
+SelectionDAGBuilder::visitLog2(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3300,7 +3279,7 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
 /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitLog10(CallInst &I) {
+SelectionDAGBuilder::visitLog10(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3402,7 +3381,7 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
 /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
-SelectionDAGBuilder::visitExp2(CallInst &I) {
+SelectionDAGBuilder::visitExp2(const CallInst &I) {
   SDValue result;
   DebugLoc dl = getCurDebugLoc();
 
@@ -3516,9 +3495,9 @@ SelectionDAGBuilder::visitExp2(CallInst &I) {
 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
 /// limited-precision mode with x == 10.0f.
 void
-SelectionDAGBuilder::visitPow(CallInst &I) {
+SelectionDAGBuilder::visitPow(const CallInst &I) {
   SDValue result;
-  Value *Val = I.getOperand(1);
+  const Value *Val = I.getOperand(1);
   DebugLoc dl = getCurDebugLoc();
   bool IsExp10 = false;
 
@@ -3664,7 +3643,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
     if (Val == 0)
       return DAG.getConstantFP(1.0, LHS.getValueType());
 
-    Function *F = DAG.getMachineFunction().getFunction();
+    const Function *F = DAG.getMachineFunction().getFunction();
     if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
@@ -3700,12 +3679,58 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
   return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
 }
 
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
+                                              const Value *V, MDNode *Variable,
+                                              uint64_t Offset,
+                                              const SDValue &N) {
+  if (!isa<Argument>(V))
+    return false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  // Ignore inlined function arguments here.
+  DIVariable DV(Variable);
+  if (DV.isInlinedFnArgument(MF.getFunction()))
+    return false;
+
+  MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()];
+  if (MBB != &MF.front())
+    return false;
+
+  unsigned Reg = 0;
+  if (N.getOpcode() == ISD::CopyFromReg) {
+    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+      if (PR)
+        Reg = PR;
+    }
+  }
+
+  if (!Reg) {
+    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+    if (VMI == FuncInfo.ValueMap.end())
+      return false;
+    Reg = VMI->second;
+  }
+
+  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+                                    TII->get(TargetOpcode::DBG_VALUE))
+    .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+  FuncInfo.ArgDbgValues.push_back(&*MIB);
+  return true;
+}
 
 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
 /// we want to emit this as a call to a named external function, return the name
 /// otherwise lower it and return null.
 const char *
-SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   DebugLoc dl = getCurDebugLoc();
   SDValue Res;
 
@@ -3792,44 +3817,76 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   }
   case Intrinsic::dbg_declare: {
-    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
-    // The real handling of this intrinsic is in FastISel.
-    if (OptLevel != CodeGenOpt::None)
-      // FIXME: Variable debug info is not supported here.
-      return 0;
-    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
       return 0;
 
     MDNode *Variable = DI.getVariable();
-    Value *Address = DI.getAddress();
+    // Parameters are handled specially.
+    bool isParameter = 
+      DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+    const Value *Address = DI.getAddress();
     if (!Address)
       return 0;
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
       Address = BCI->getOperand(0);
-    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-    // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
-      return 0;
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
-    if (SI == FuncInfo.StaticAllocaMap.end())
-      return 0; // VLAs.
-    int FI = SI->second;
+    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    if (AI) {
+      // Don't handle byval arguments or VLAs, for example.
+      // Non-byval arguments are handled here (they refer to the stack temporary
+      // alloca at this point).
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        FuncInfo.StaticAllocaMap.find(AI);
+      if (SI == FuncInfo.StaticAllocaMap.end())
+        return 0; // VLAs.
+      int FI = SI->second;
+
+      MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+      if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+        MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+    }
 
-    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
-      MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+    SDValue &N = NodeMap[Address];
+    SDDbgValue *SDV;
+    if (N.getNode()) {
+      if (isParameter && !AI) {
+        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+        if (FINode)
+          // Byval parameter.  We have a frame index at this point.
+          SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+                                0, dl, SDNodeOrder);
+        else
+          // Can't do anything with other non-AI cases yet.  This might be a
+          // parameter of a callee function that got inlined, for example.
+          return 0;
+      } else if (AI)
+        SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+                              0, dl, SDNodeOrder);
+      else
+        // Can't do anything with other non-AI cases yet.
+        return 0;
+      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+    } else {
+      // This isn't useful, but it shows what we're missing.
+      SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
+                            0, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, isParameter);
+    }
     return 0;
   }
   case Intrinsic::dbg_value: {
-    DbgValueInst &DI = cast<DbgValueInst>(I);
+    const DbgValueInst &DI = cast<DbgValueInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
       return 0;
 
     MDNode *Variable = DI.getVariable();
     uint64_t Offset = DI.getOffset();
-    Value *V = DI.getValue();
+    const Value *V = DI.getValue();
     if (!V)
       return 0;
 
@@ -3838,26 +3895,31 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     // absolute, but not relative, values are different depending on whether
     // debug info exists.
     ++SDNodeOrder;
+    SDDbgValue *SDV;
     if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
-      DAG.AddDbgValue(DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder));
+      SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, false);
     } else {
       SDValue &N = NodeMap[V];
-      if (N.getNode())
-        DAG.AddDbgValue(DAG.getDbgValue(Variable, N.getNode(),
-                                        N.getResNo(), Offset, dl, SDNodeOrder),
-                        N.getNode());
-      else
+      if (N.getNode()) {
+        if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+          SDV = DAG.getDbgValue(Variable, N.getNode(),
+                                N.getResNo(), Offset, dl, SDNodeOrder);
+          DAG.AddDbgValue(SDV, N.getNode(), false);
+        }
+      } else {
         // We may expand this to cover more cases.  One case where we have no
         // data available is an unreferenced parameter; we need this fallback.
-        DAG.AddDbgValue(DAG.getDbgValue(Variable, 
-                                        UndefValue::get(V->getType()),
-                                        Offset, dl, SDNodeOrder));
+        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
+                              Offset, dl, SDNodeOrder);
+        DAG.AddDbgValue(SDV, 0, false);
+      }
     }
 
     // Build a debug info table entry.
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
       V = BCI->getOperand(0);
-    AllocaInst *AI = dyn_cast<AllocaInst>(V);
+    const AllocaInst *AI = dyn_cast<AllocaInst>(V);
     // Don't handle byval struct arguments or VLAs, for example.
     if (!AI)
       return 0;
@@ -3874,7 +3936,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_exception: {
     // Insert the EXCEPTIONADDR instruction.
-    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+    assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
     SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
     SDValue Ops[1];
     Ops[0] = DAG.getRoot();
@@ -3885,16 +3948,17 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
 
   case Intrinsic::eh_selector: {
+    MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()];
     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (CurMBB->isLandingPad())
-      AddCatchInfo(I, &MMI, CurMBB);
+    if (CallMBB->isLandingPad())
+      AddCatchInfo(I, &MMI, CallMBB);
     else {
 #ifndef NDEBUG
       FuncInfo.CatchInfoLost.insert(&I);
 #endif
       // FIXME: Mark exception selector register as live in.  Hack for PR1508.
       unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) CurMBB->addLiveIn(Reg);
+      if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg);
     }
 
     // Insert the EHSELECTION instruction.
@@ -3977,7 +4041,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
     }
     EVT DestVT = TLI.getValueType(I.getType());
-    Value *Op1 = I.getOperand(1);
+    const Value *Op1 = I.getOperand(1);
     Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
                                DAG.getValueType(DestVT),
                                DAG.getValueType(getValue(Op1).getValueType()),
@@ -4146,8 +4210,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::gcroot:
     if (GFI) {
-      Value *Alloca = I.getOperand(1);
-      Constant *TypeMap = cast<Constant>(I.getOperand(2));
+      const Value *Alloca = I.getOperand(1);
+      const Constant *TypeMap = cast<Constant>(I.getOperand(2));
 
       FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
       GFI->addStackRoot(FI->getIndex(), TypeMap);
@@ -4244,93 +4308,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
 }
 
-/// Test if the given instruction is in a position to be optimized
-/// with a tail-call. This roughly means that it's in a block with
-/// a return and there's nothing that needs to be scheduled
-/// between it and the return.
-///
-/// This function only tests target-independent requirements.
-static bool
-isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
-                     const TargetLowering &TLI) {
-  const Instruction *I = CS.getInstruction();
-  const BasicBlock *ExitBB = I->getParent();
-  const TerminatorInst *Term = ExitBB->getTerminator();
-  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
-  const Function *F = ExitBB->getParent();
-
-  // The block must end in a return statement or unreachable.
-  //
-  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
-  // an unreachable, for now. The way tailcall optimization is currently
-  // implemented means it will add an epilogue followed by a jump. That is
-  // not profitable. Also, if the callee is a special function (e.g.
-  // longjmp on x86), it can end up causing miscompilation that has not
-  // been fully understood.
-  if (!Ret &&
-      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
-
-  // If I will have a chain, make sure no other instruction that will have a
-  // chain interposes between I and the return.
-  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
-      !I->isSafeToSpeculativelyExecute())
-    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
-         --BBI) {
-      if (&*BBI == I)
-        break;
-      // Debug info intrinsics do not get in the way of tail call optimization.
-      if (isa<DbgInfoIntrinsic>(BBI))
-        continue;
-      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !BBI->isSafeToSpeculativelyExecute())
-        return false;
-    }
-
-  // If the block ends with a void return or unreachable, it doesn't matter
-  // what the call's return type is.
-  if (!Ret || Ret->getNumOperands() == 0) return true;
-
-  // If the return value is undef, it doesn't matter what the call's
-  // return type is.
-  if (isa<UndefValue>(Ret->getOperand(0))) return true;
-
-  // Conservatively require the attributes of the call to match those of
-  // the return. Ignore noalias because it doesn't affect the call sequence.
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
-  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
-    return false;
-
-  // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
-    return false;
-
-  // Otherwise, make sure the unmodified return value of I is the return value.
-  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
-       U = dyn_cast<Instruction>(U->getOperand(0))) {
-    if (!U)
-      return false;
-    if (!U->hasOneUse())
-      return false;
-    if (U == I)
-      break;
-    // Check for a truly no-op truncate.
-    if (isa<TruncInst>(U) &&
-        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
-      continue;
-    // Check for a truly no-op bitcast.
-    if (isa<BitCastInst>(U) &&
-        (U->getOperand(0)->getType() == U->getType() ||
-         (U->getOperand(0)->getType()->isPointerTy() &&
-          U->getType()->isPointerTy())))
-      continue;
-    // Otherwise it's not a true no-op.
-    return false;
-  }
-
-  return true;
-}
-
-void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                       bool isTailCall,
                                       MachineBasicBlock *LandingPad) {
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -4378,7 +4356,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
     RetTy = Type::getVoidTy(FTy->getContext());
   }
 
-  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     SDValue ArgNode = getValue(*i);
     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
@@ -4509,12 +4487,12 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
 
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
 /// value is equal or not-equal to zero.
-static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
        UI != E; ++UI) {
-    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
       if (IC->isEquality())
-        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
           if (C->isNullValue())
             continue;
     // Unknown instruction.
@@ -4523,17 +4501,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
   return true;
 }
 
-static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+                             const Type *LoadTy,
                              SelectionDAGBuilder &Builder) {
 
   // Check to see if this load can be trivially constant folded, e.g. if the
   // input is from a string literal.
-  if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
     // Cast pointer to the type we really want to load.
-    LoadInput = ConstantExpr::getBitCast(LoadInput,
+    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
                                          PointerType::getUnqual(LoadTy));
 
-    if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
+    if (const Constant *LoadCst =
+          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+                                       Builder.TD))
       return Builder.getValue(LoadCst);
   }
 
@@ -4566,18 +4547,18 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
 /// If so, return true and lower it, otherwise return false and it will be
 /// lowered like a normal call.
-bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
   if (I.getNumOperands() != 4)
     return false;
 
-  Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+  const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
   if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
       !I.getOperand(3)->getType()->isIntegerTy() ||
       !I.getType()->isIntegerTy())
     return false;
 
-  ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+  const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
 
   // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
   // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
@@ -4643,11 +4624,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
 }
 
 
-void SelectionDAGBuilder::visitCall(CallInst &I) {
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
     if (F->isDeclaration()) {
-      const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+      const TargetIntrinsicInfo *II = TM.getIntrinsicInfo();
       if (II) {
         if (unsigned IID = II->getIntrinsicID(F)) {
           RenameFn = visitIntrinsicCall(I, IID);
@@ -4871,14 +4852,13 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
 /// operand list.  This adds the code marker and includes the number of
 /// values added into it.
-void RegsForValue::AddInlineAsmOperands(unsigned Code,
-                                        bool HasMatching,unsigned MatchingIdx,
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+                                        unsigned MatchingIdx,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
-  assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
-  unsigned Flag = Code | (Regs.size() << 3);
+  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
   if (HasMatching)
-    Flag |= 0x80000000 | (MatchingIdx << 16);
+    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
   SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
   Ops.push_back(Res);
 
@@ -4994,7 +4974,7 @@ public:
     if (isIndirect) {
       const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
       if (!PtrTy)
-        llvm_report_error("Indirect operand for inline asm not a pointer!");
+        report_fatal_error("Indirect operand for inline asm not a pointer!");
       OpTy = PtrTy->getElementType();
     }
 
@@ -5214,31 +5194,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
   // Otherwise, we couldn't allocate enough registers for this.
 }
 
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-static bool
-hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
-                          const TargetLowering &TLI) {
-  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
-    InlineAsm::ConstraintInfo &CI = CInfos[i];
-    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
-      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
-      if (CType == TargetLowering::C_Memory)
-        return true;
-    }
-
-    // Indirect operand accesses access memory.
-    if (CI.isIndirect)
-      return true;
-  }
-
-  return false;
-}
-
 /// visitInlineAsm - Handle a call to an InlineAsm object.
 ///
-void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
-  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
 
   /// ConstraintOperands - Information about all of the constraints.
   std::vector<SDISelAsmOperandInfo> ConstraintOperands;
@@ -5274,7 +5233,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     case InlineAsm::isOutput:
       // Indirect outputs just consume an argument.
       if (OpInfo.isIndirect) {
-        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
         break;
       }
 
@@ -5291,7 +5250,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       ++ResNo;
       break;
     case InlineAsm::isInput:
-      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
       break;
     case InlineAsm::isClobber:
       // Nothing to do.
@@ -5304,7 +5263,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       // Strip bitcasts, if any.  This mostly comes up for functions.
       OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
 
-      if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
@@ -5327,14 +5286,15 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     // error.
     if (OpInfo.hasMatchingInput()) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+      
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (OpInfo.ConstraintVT.getSizeInBits() !=
              Input.ConstraintVT.getSizeInBits())) {
-          llvm_report_error("Unsupported asm: input constraint"
-                            " with a matching output constraint of incompatible"
-                            " type!");
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
         }
         Input.ConstraintVT = OpInfo.ConstraintVT;
       }
@@ -5356,7 +5316,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // If the operand is a float, integer, or vector constant, spill to a
       // constant pool entry to get its address.
-      Value *OpVal = OpInfo.CallOperandVal;
+      const Value *OpVal = OpInfo.CallOperandVal;
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
           isa<ConstantVector>(OpVal)) {
         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
@@ -5409,6 +5369,11 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
           DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
                                       TLI.getPointerTy()));
 
+  // If we have a !srcloc metadata node associated with it, we want to attach
+  // this to the ultimately generated inline asm machineinstr.  To do this, we
+  // pass in the third operand as this (potentially null) inline asm MDNode.
+  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -5428,8 +5393,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
 
         // Add information to the INLINEASM node to know about this output.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
         break;
@@ -5439,10 +5404,9 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty()) {
-        llvm_report_error("Couldn't allocate output reg for"
-                          " constraint '" + OpInfo.ConstraintCode + "'!");
-      }
+      if (OpInfo.AssignedRegs.Regs.empty())
+        report_fatal_error("Couldn't allocate output reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
 
       // If this is an indirect operand, store through the pointer after the
       // asm.
@@ -5459,8 +5423,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       // Add information to the INLINEASM node to know that this register is
       // set.
       OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
-                                               6 /* EARLYCLOBBER REGDEF */ :
-                                               2 /* REGDEF */ ,
+                                           InlineAsm::Kind_RegDefEarlyClobber :
+                                               InlineAsm::Kind_RegDef,
                                                false,
                                                0,
                                                DAG,
@@ -5477,27 +5441,30 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
         // Scan until we find the definition we already emitted of this operand.
         // When we find it, create a RegsForValue operand.
-        unsigned CurOp = 2;  // The first operand.
+        unsigned CurOp = InlineAsm::Op_FirstOperand;
         for (; OperandNo; --OperandNo) {
           // Advance to the next operand.
           unsigned OpFlag =
             cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-          assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
-                  (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
-                  (OpFlag & 7) == 4 /*MEM*/) &&
-                 "Skipped past definitions?");
+          assert((InlineAsm::isRegDefKind(OpFlag) ||
+                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
           CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
         }
 
         unsigned OpFlag =
           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-        if ((OpFlag & 7) == 2 /*REGDEF*/
-            || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+        if (InlineAsm::isRegDefKind(OpFlag) ||
+            InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
           if (OpInfo.isIndirect) {
-            llvm_report_error("Don't know how to handle tied indirect "
-                              "register inputs yet!");
+            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+            LLVMContext &Ctx = *DAG.getContext();
+            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
+                          " don't know how to handle tied "
+                          "indirect register inputs");
           }
+          
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
@@ -5512,22 +5479,23 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                     Chain, &Flag);
-          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(),
                                            DAG, AsmNodeOperands);
           break;
-        } else {
-          assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
-          assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
-                 "Unexpected number of operands");
-          // Add information to the INLINEASM node to know about this input.
-          // See InlineAsm.h isUseOperandTiedToDef.
-          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
-          AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
-                                                          TLI.getPointerTy()));
-          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
-          break;
         }
+        
+        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+               "Unexpected number of operands");
+        // Add information to the INLINEASM node to know about this input.
+        // See InlineAsm.h isUseOperandTiedToDef.
+        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+                                                    OpInfo.getMatchedOperand());
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+        break;
       }
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5537,24 +5505,26 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
         std::vector<SDValue> Ops;
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                          hasMemory, Ops, DAG);
-        if (Ops.empty()) {
-          llvm_report_error("Invalid operand for inline asm"
-                            " constraint '" + OpInfo.ConstraintCode + "'!");
-        }
+        if (Ops.empty())
+          report_fatal_error("Invalid operand for inline asm constraint '" +
+                             Twine(OpInfo.ConstraintCode) + "'!");
 
         // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+        unsigned ResOpType =
+          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
-      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+      }
+      
+      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
                "Memory operands expect pointer values");
 
         // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
+        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(InOperandVal);
@@ -5569,15 +5539,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // Copy the input into the appropriate registers.
       if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal()) {
-        llvm_report_error("Couldn't allocate input reg for"
-                          " constraint '"+ OpInfo.ConstraintCode +"'!");
-      }
+          !OpInfo.AssignedRegs.areValueTypesLegal())
+        report_fatal_error("Couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                         Chain, &Flag);
 
-      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
                                                DAG, AsmNodeOperands);
       break;
     }
@@ -5585,7 +5554,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       // Add the clobbered value to the operand list, so that the register
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(
+                                            InlineAsm::Kind_RegDefEarlyClobber,
                                                  false, 0, DAG,
                                                  AsmNodeOperands);
       break;
@@ -5593,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     }
   }
 
-  // Finish up input operands.
+  // Finish up input operands.  Set the input chain and add the flag last.
   AsmNodeOperands[0] = Chain;
   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
 
@@ -5638,17 +5608,16 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       return;
   }
 
-  std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
 
   // Process indirect outputs, first output all of the flagged copies out of
   // physregs.
   for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
-    Value *Ptr = IndirectStoresToEmit[i].second;
+    const Value *Ptr = IndirectStoresToEmit[i].second;
     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
                                              Chain, &Flag);
     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
-
   }
 
   // Emit the non-flagged stores from the physregs.
@@ -5669,14 +5638,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
   DAG.setRoot(Chain);
 }
 
-void SelectionDAGBuilder::visitVAStart(CallInst &I) {
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)));
@@ -5684,14 +5653,14 @@ void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
   DAG.setRoot(V.getValue(1));
 }
 
-void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
-void SelectionDAGBuilder::visitVACopy(CallInst &I) {
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getOperand(1)),
@@ -5711,7 +5680,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             CallingConv::ID CallConv, bool isTailCall,
                             bool isReturnValueUsed,
                             SDValue Callee,
-                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+                            ArgListTy &Args, SelectionDAG &DAG,
+                            DebugLoc dl) const {
   // Handle all of the outgoing arguments.
   SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -5862,18 +5832,19 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
 
 void TargetLowering::LowerOperationWrapper(SDNode *N,
                                            SmallVectorImpl<SDValue> &Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
   if (Res.getNode())
     Results.push_back(Res);
 }
 
-SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("LowerOperation not implemented for this target!");
   return SDValue();
 }
 
-void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
   SDValue Op = getValue(V);
   assert((Op.getOpcode() != ISD::CopyFromReg ||
           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
@@ -5888,9 +5859,9 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
 
 #include "llvm/CodeGen/SelectionDAGISel.h"
 
-void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
+void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
   // If this is the entry block, emit arguments.
-  Function &F = *LLVMBB->getParent();
+  const Function &F = *LLVMBB->getParent();
   SelectionDAG &DAG = SDB->DAG;
   SDValue OldRoot = DAG.getRoot();
   DebugLoc dl = SDB->getCurDebugLoc();
@@ -5915,14 +5886,14 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
     // or one register.
     ISD::ArgFlagsTy Flags;
     Flags.setSRet();
-    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
     ISD::InputArg RetArg(Flags, RegisterVT, true);
     Ins.push_back(RetArg);
   }
 
   // Set up the incoming argument description vector.
   unsigned Idx = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
        I != E; ++I, ++Idx) {
     SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(TLI, I->getType(), ValueVTs);
@@ -6024,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
     ++i;
   }
 
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
       ++I, ++Idx) {
     SmallVector<SDValue, 4> ArgValues;
     SmallVector<EVT, 4> ValueVTs;
@@ -6067,7 +6038,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
 
   // Finally, if the target has anything special to do, allow it to do so.
   // FIXME: this should insert code into the DAG!
-  EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
+  EmitFunctionEntryCode();
 }
 
 /// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
@@ -6078,51 +6049,50 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
 /// the end.
 ///
 void
-SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
 
   // Check successor nodes' PHI nodes that expect a constant to be available
   // from this block.
   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
     if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
 
     // If this terminator has multiple identical successors (common for
     // switches), only handle each succ once.
     if (!SuccsHandled.insert(SuccMBB)) continue;
 
     MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
 
     // At this point we know that there is a 1-1 correspondence between LLVM PHI
     // nodes and Machine PHI nodes, but the incoming operands have not been
     // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
       // Ignore dead phi's.
       if (PN->use_empty()) continue;
 
       unsigned Reg;
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
 
-      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
-        unsigned &RegOut = SDB->ConstantsOut[C];
+      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = ConstantsOut[C];
         if (RegOut == 0) {
-          RegOut = FuncInfo->CreateRegForValue(C);
-          SDB->CopyValueToVirtualRegister(C, RegOut);
+          RegOut = FuncInfo.CreateRegForValue(C);
+          CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
       } else {
-        Reg = FuncInfo->ValueMap[PHIOp];
+        Reg = FuncInfo.ValueMap[PHIOp];
         if (Reg == 0) {
           assert(isa<AllocaInst>(PHIOp) &&
-                 FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
-          Reg = FuncInfo->CreateRegForValue(PHIOp);
-          SDB->CopyValueToVirtualRegister(PHIOp, Reg);
+          Reg = FuncInfo.CreateRegForValue(PHIOp);
+          CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
 
@@ -6132,77 +6102,12 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
         EVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
-          SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+          FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
         Reg += NumRegisters;
       }
     }
   }
-  SDB->ConstantsOut.clear();
-}
-
-/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
-/// supports legal types, and it emits MachineInstrs directly instead of
-/// creating SelectionDAG nodes.
-///
-bool
-SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
-                                                      FastISel *F) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
-
-  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
-  unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
-
-  // Check successor nodes' PHI nodes that expect a constant to be available
-  // from this block.
-  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
-    if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
-
-    // If this terminator has multiple identical successors (common for
-    // switches), only handle each succ once.
-    if (!SuccsHandled.insert(SuccMBB)) continue;
-
-    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
-
-    // At this point we know that there is a 1-1 correspondence between LLVM PHI
-    // nodes and Machine PHI nodes, but the incoming operands have not been
-    // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
-      // Ignore dead phi's.
-      if (PN->use_empty()) continue;
-
-      // Only handle legal types. Two interesting things to note here. First,
-      // by bailing out early, we may leave behind some dead instructions,
-      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
-      // own moves. Second, this check is necessary becuase FastISel doesn't
-      // use CreateRegForValue to create registers, so it always creates
-      // exactly one register for each non-void instruction.
-      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
-      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
-        // Promote MVT::i1.
-        if (VT == MVT::i1)
-          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
-        else {
-          SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-          return false;
-        }
-      }
-
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
-
-      unsigned Reg = F->getRegForValue(PHIOp);
-      if (Reg == 0) {
-        SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-        return false;
-      }
-      SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
-    }
-  }
-
-  return true;
+  ConstantsOut.clear();
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index fdcba0f..3fcd4b9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -36,6 +36,7 @@ class BasicBlock;
 class BitCastInst;
 class BranchInst;
 class CallInst;
+class DbgValueInst;
 class ExtractElementInst;
 class ExtractValueInst;
 class FCmpInst;
@@ -58,6 +59,7 @@ class LoadInst;
 class MachineBasicBlock;
 class MachineInstr;
 class MachineRegisterInfo;
+class MDNode;
 class PHINode;
 class PtrToIntInst;
 class ReturnInst;
@@ -80,11 +82,8 @@ class ZExtInst;
 //===----------------------------------------------------------------------===//
 /// SelectionDAGBuilder - This is the common target-independent lowering
 /// implementation that is parameterized by a TargetLowering object.
-/// Also, targets can overload any lowering method.
 ///
 class SelectionDAGBuilder {
-  MachineBasicBlock *CurMBB;
-
   /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
   DebugLoc CurDebugLoc;
 
@@ -143,15 +142,16 @@ private:
   /// CaseRec - A struct with ctor used in lowering switches to a binary tree
   /// of conditional branches.
   struct CaseRec {
-    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+    CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+            CaseRange r) :
     CaseBB(bb), LT(lt), GE(ge), Range(r) {}
 
     /// CaseBB - The MBB in which to emit the compare and branch
     MachineBasicBlock *CaseBB;
     /// LT, GE - If nonzero, we know the current case value must be less-than or
     /// greater-than-or-equal-to these Constants.
-    Constant *LT;
-    Constant *GE;
+    const Constant *LT;
+    const Constant *GE;
     /// Range - A pair of iterators representing the range of case values to be
     /// processed at this point in the binary search tree.
     CaseRange Range;
@@ -182,7 +182,8 @@ private:
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
   /// blocks needed by multi-case switch statements.
   struct CaseBlock {
-    CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+              const Value *cmpmiddle,
               MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
               MachineBasicBlock *me)
       : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
@@ -192,7 +193,7 @@ private:
     // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
     // Emit by default LHS op RHS. MHS is used for range comparisons:
     // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
-    Value *CmpLHS, *CmpMHS, *CmpRHS;
+    const Value *CmpLHS, *CmpMHS, *CmpRHS;
     // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
     MachineBasicBlock *TrueBB, *FalseBB;
     // ThisBB - the block into which to emit the code for the setcc and branches
@@ -214,12 +215,12 @@ private:
     MachineBasicBlock *Default;
   };
   struct JumpTableHeader {
-    JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H,
+    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
                     bool E = false):
       First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
     APInt First;
     APInt Last;
-    Value *SValue;
+    const Value *SValue;
     MachineBasicBlock *HeaderBB;
     bool Emitted;
   };
@@ -236,7 +237,7 @@ private:
   typedef SmallVector<BitTestCase, 3> BitTestInfo;
 
   struct BitTestBlock {
-    BitTestBlock(APInt F, APInt R, Value* SV,
+    BitTestBlock(APInt F, APInt R, const Value* SV,
                  unsigned Rg, bool E,
                  MachineBasicBlock* P, MachineBasicBlock* D,
                  const BitTestInfo& C):
@@ -244,7 +245,7 @@ private:
       Parent(P), Default(D), Cases(C) { }
     APInt First;
     APInt Range;
-    Value  *SValue;
+    const Value *SValue;
     unsigned Reg;
     bool Emitted;
     MachineBasicBlock *Parent;
@@ -256,7 +257,8 @@ public:
   // TLI - This is information that describes the available target features we
   // need for lowering.  This indicates when operations are unavailable,
   // implemented with a libcall, etc.
-  TargetLowering &TLI;
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
   SelectionDAG &DAG;
   const TargetData *TD;
   AliasAnalysis *AA;
@@ -271,17 +273,9 @@ public:
   /// SwitchInst code generation information.
   std::vector<BitTestBlock> BitTestCases;
 
-  /// PHINodesToUpdate - A list of phi instructions whose operand list will
-  /// be updated after processing the current basic block.
-  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
-
-  /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
-  /// scheduler custom lowering), track the change here.
-  DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
-
   // Emit PHI-node-operand constants only once even if used by multiple
   // PHI nodes.
-  DenseMap<Constant*, unsigned> ConstantsOut;
+  DenseMap<const Constant *, unsigned> ConstantsOut;
 
   /// FuncInfo - Information about the function as a whole.
   ///
@@ -302,16 +296,16 @@ public:
 
   LLVMContext *Context;
 
-  SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
-                      FunctionLoweringInfo &funcinfo,
+  SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
-    : SDNodeOrder(0), TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+    : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+      DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
       HasTailCall(false), Context(dag.getContext()) {
   }
 
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
 
-  /// clear - Clear out the curret SelectionDAG and the associated
+  /// clear - Clear out the current SelectionDAG and the associated
   /// state and prepare this SelectionDAGBuilder object to be used
   /// for a new block. This doesn't clear out information about
   /// additional blocks that are needed to complete switch lowering
@@ -333,22 +327,19 @@ public:
   SDValue getControlRoot();
 
   DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-  void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
 
   unsigned getSDNodeOrder() const { return SDNodeOrder; }
 
-  void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+  void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
 
   /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
   /// from how the code appeared in the source. The ordering is used by the
   /// scheduler to effectively turn off scheduling.
   void AssignOrderingToNode(const SDNode *Node);
 
-  void visit(Instruction &I);
-
-  void visit(unsigned Opcode, User &I);
+  void visit(const Instruction &I);
 
-  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+  void visit(unsigned Opcode, const User &I);
 
   SDValue getValue(const Value *V);
 
@@ -362,136 +353,154 @@ public:
                             std::set<unsigned> &OutputRegs, 
                             std::set<unsigned> &InputRegs);
 
-  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+  void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
-                            unsigned Opc);
-  void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *SwitchBB, unsigned Opc);
+  void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
-                                    MachineBasicBlock *CurBB);
+                                    MachineBasicBlock *CurBB,
+                                    MachineBasicBlock *SwitchBB);
   bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
-  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
-  void CopyToExportRegsIfNeeded(Value *V);
-  void ExportFromCurrentBlock(Value *V);
-  void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+  bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+  void CopyToExportRegsIfNeeded(const Value *V);
+  void ExportFromCurrentBlock(const Value *V);
+  void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
                    MachineBasicBlock *LandingPad = NULL);
 
 private:
   // Terminator instructions.
-  void visitRet(ReturnInst &I);
-  void visitBr(BranchInst &I);
-  void visitSwitch(SwitchInst &I);
-  void visitIndirectBr(IndirectBrInst &I);
-  void visitUnreachable(UnreachableInst &I) { /* noop */ }
+  void visitRet(const ReturnInst &I);
+  void visitBr(const BranchInst &I);
+  void visitSwitch(const SwitchInst &I);
+  void visitIndirectBr(const IndirectBrInst &I);
+  void visitUnreachable(const UnreachableInst &I) { /* noop */ }
 
   // Helpers for visitSwitch
   bool handleSmallSwitchRange(CaseRec& CR,
                               CaseRecVector& WorkList,
-                              Value* SV,
-                              MachineBasicBlock* Default);
+                              const Value* SV,
+                              MachineBasicBlock* Default,
+                              MachineBasicBlock *SwitchBB);
   bool handleJTSwitchCase(CaseRec& CR,
                           CaseRecVector& WorkList,
-                          Value* SV,
-                          MachineBasicBlock* Default);
+                          const Value* SV,
+                          MachineBasicBlock* Default,
+                          MachineBasicBlock *SwitchBB);
   bool handleBTSplitSwitchCase(CaseRec& CR,
                                CaseRecVector& WorkList,
-                               Value* SV,
-                               MachineBasicBlock* Default);
+                               const Value* SV,
+                               MachineBasicBlock* Default,
+                               MachineBasicBlock *SwitchBB);
   bool handleBitTestsSwitchCase(CaseRec& CR,
                                 CaseRecVector& WorkList,
-                                Value* SV,
-                                MachineBasicBlock* Default);  
+                                const Value* SV,
+                                MachineBasicBlock* Default,
+                                MachineBasicBlock *SwitchBB);
 public:
-  void visitSwitchCase(CaseBlock &CB);
-  void visitBitTestHeader(BitTestBlock &B);
+  void visitSwitchCase(CaseBlock &CB,
+                       MachineBasicBlock *SwitchBB);
+  void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
   void visitBitTestCase(MachineBasicBlock* NextMBB,
                         unsigned Reg,
-                        BitTestCase &B);
+                        BitTestCase &B,
+                        MachineBasicBlock *SwitchBB);
   void visitJumpTable(JumpTable &JT);
-  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+                            MachineBasicBlock *SwitchBB);
   
 private:
   // These all get lowered before this pass.
-  void visitInvoke(InvokeInst &I);
-  void visitUnwind(UnwindInst &I);
-
-  void visitBinary(User &I, unsigned OpCode);
-  void visitShift(User &I, unsigned Opcode);
-  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
-  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
-  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
-  void visitFSub(User &I);
-  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
-  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
-  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
-  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
-  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
-  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
-  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
-  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
-  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
-  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
-  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
-  void visitShl (User &I) { visitShift(I, ISD::SHL); }
-  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
-  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
-  void visitICmp(User &I);
-  void visitFCmp(User &I);
+  void visitInvoke(const InvokeInst &I);
+  void visitUnwind(const UnwindInst &I);
+
+  void visitBinary(const User &I, unsigned OpCode);
+  void visitShift(const User &I, unsigned Opcode);
+  void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(const User &I);
+  void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+  void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(const User &I);
+  void visitFCmp(const User &I);
   // Visit the conversion instructions
-  void visitTrunc(User &I);
-  void visitZExt(User &I);
-  void visitSExt(User &I);
-  void visitFPTrunc(User &I);
-  void visitFPExt(User &I);
-  void visitFPToUI(User &I);
-  void visitFPToSI(User &I);
-  void visitUIToFP(User &I);
-  void visitSIToFP(User &I);
-  void visitPtrToInt(User &I);
-  void visitIntToPtr(User &I);
-  void visitBitCast(User &I);
-
-  void visitExtractElement(User &I);
-  void visitInsertElement(User &I);
-  void visitShuffleVector(User &I);
-
-  void visitExtractValue(ExtractValueInst &I);
-  void visitInsertValue(InsertValueInst &I);
-
-  void visitGetElementPtr(User &I);
-  void visitSelect(User &I);
-
-  void visitAlloca(AllocaInst &I);
-  void visitLoad(LoadInst &I);
-  void visitStore(StoreInst &I);
-  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
-  void visitCall(CallInst &I);
-  bool visitMemCmpCall(CallInst &I);
+  void visitTrunc(const User &I);
+  void visitZExt(const User &I);
+  void visitSExt(const User &I);
+  void visitFPTrunc(const User &I);
+  void visitFPExt(const User &I);
+  void visitFPToUI(const User &I);
+  void visitFPToSI(const User &I);
+  void visitUIToFP(const User &I);
+  void visitSIToFP(const User &I);
+  void visitPtrToInt(const User &I);
+  void visitIntToPtr(const User &I);
+  void visitBitCast(const User &I);
+
+  void visitExtractElement(const User &I);
+  void visitInsertElement(const User &I);
+  void visitShuffleVector(const User &I);
+
+  void visitExtractValue(const ExtractValueInst &I);
+  void visitInsertValue(const InsertValueInst &I);
+
+  void visitGetElementPtr(const User &I);
+  void visitSelect(const User &I);
+
+  void visitAlloca(const AllocaInst &I);
+  void visitLoad(const LoadInst &I);
+  void visitStore(const StoreInst &I);
+  void visitPHI(const PHINode &I);
+  void visitCall(const CallInst &I);
+  bool visitMemCmpCall(const CallInst &I);
   
-  void visitInlineAsm(CallSite CS);
-  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
-  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
-
-  void visitPow(CallInst &I);
-  void visitExp2(CallInst &I);
-  void visitExp(CallInst &I);
-  void visitLog(CallInst &I);
-  void visitLog2(CallInst &I);
-  void visitLog10(CallInst &I);
-
-  void visitVAStart(CallInst &I);
-  void visitVAArg(VAArgInst &I);
-  void visitVAEnd(CallInst &I);
-  void visitVACopy(CallInst &I);
-
-  void visitUserOp1(Instruction &I) {
+  void visitInlineAsm(ImmutableCallSite CS);
+  const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+  void visitPow(const CallInst &I);
+  void visitExp2(const CallInst &I);
+  void visitExp(const CallInst &I);
+  void visitLog(const CallInst &I);
+  void visitLog2(const CallInst &I);
+  void visitLog10(const CallInst &I);
+
+  void visitVAStart(const CallInst &I);
+  void visitVAArg(const VAArgInst &I);
+  void visitVAEnd(const CallInst &I);
+  void visitVACopy(const CallInst &I);
+
+  void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");
   }
-  void visitUserOp2(Instruction &I) {
+  void visitUserOp2(const Instruction &I) {
     llvm_unreachable("UserOp2 should not exist at instruction selection time!");
   }
   
-  const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
-  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+  const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
+
+  void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+  /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a
+  /// function argument, create the corresponding DBG_VALUE machine instruction
+  /// for it now. At the end of instruction selection, they will be inserted to
+  /// the entry BB.
+  bool EmitFuncArgumentDbgValue(const DbgValueInst &DI,
+                                const Value *V, MDNode *Variable,
+                                uint64_t Offset, const SDValue &N);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 9b137a5..422cb7a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,10 +19,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
@@ -32,18 +29,13 @@
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -52,7 +44,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
@@ -69,10 +60,6 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
 static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
           cl::desc("Enable abort calls when \"fast\" instruction fails"));
-static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
-                  cl::desc("Schedule copies of livein registers"),
-                  cl::init(false));
 
 #ifndef NDEBUG
 static cl::opt<bool>
@@ -161,9 +148,9 @@ namespace llvm {
 // When new basic blocks are inserted and the edges from MBB to its successors
 // are modified, the method should insert pairs of <OldSucc, NewSucc> into the
 // DenseMap.
-MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                            MachineBasicBlock *MBB) const {
 #ifndef NDEBUG
   dbgs() << "If a target marks an instruction with "
           "'usesCustomInserter', it must implement "
@@ -173,115 +160,15 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   return 0;
 }
 
-/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
-/// physical register has only a single copy use, then coalesced the copy
-/// if possible.
-static void EmitLiveInCopy(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator &InsertPos,
-                           unsigned VirtReg, unsigned PhysReg,
-                           const TargetRegisterClass *RC,
-                           DenseMap<MachineInstr*, unsigned> &CopyRegMap,
-                           const MachineRegisterInfo &MRI,
-                           const TargetRegisterInfo &TRI,
-                           const TargetInstrInfo &TII) {
-  unsigned NumUses = 0;
-  MachineInstr *UseMI = NULL;
-  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
-         UE = MRI.use_end(); UI != UE; ++UI) {
-    UseMI = &*UI;
-    if (++NumUses > 1)
-      break;
-  }
-
-  // If the number of uses is not one, or the use is not a move instruction,
-  // don't coalesce. Also, only coalesce away a virtual register to virtual
-  // register copy.
-  bool Coalesced = false;
-  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-  if (NumUses == 1 &&
-      TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-      TargetRegisterInfo::isVirtualRegister(DstReg)) {
-    VirtReg = DstReg;
-    Coalesced = true;
-  }
-
-  // Now find an ideal location to insert the copy.
-  MachineBasicBlock::iterator Pos = InsertPos;
-  while (Pos != MBB->begin()) {
-    MachineInstr *PrevMI = prior(Pos);
-    DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
-    // copyRegToReg might emit multiple instructions to do a copy.
-    unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
-    if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
-      // This is what the BB looks like right now:
-      // r1024 = mov r0
-      // ...
-      // r1    = mov r1024
-      //
-      // We want to insert "r1025 = mov r1". Inserting this copy below the
-      // move to r1024 makes it impossible for that move to be coalesced.
-      //
-      // r1025 = mov r1
-      // r1024 = mov r0
-      // ...
-      // r1    = mov 1024
-      // r2    = mov 1025
-      break; // Woot! Found a good location.
-    --Pos;
-  }
-
-  bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
-  assert(Emitted && "Unable to issue a live-in copy instruction!\n");
-  (void) Emitted;
-
-  CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
-  if (Coalesced) {
-    if (&*InsertPos == UseMI) ++InsertPos;
-    MBB->erase(UseMI);
-  }
-}
-
-/// EmitLiveInCopies - If this is the first basic block in the function,
-/// and if it has live ins that need to be copied into vregs, emit the
-/// copies into the block.
-static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
-                             const MachineRegisterInfo &MRI,
-                             const TargetRegisterInfo &TRI,
-                             const TargetInstrInfo &TII) {
-  if (SchedLiveInCopies) {
-    // Emit the copies at a heuristically-determined location in the block.
-    DenseMap<MachineInstr*, unsigned> CopyRegMap;
-    MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
-    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
-           E = MRI.livein_end(); LI != E; ++LI)
-      if (LI->second) {
-        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
-                       RC, CopyRegMap, MRI, TRI, TII);
-      }
-  } else {
-    // Emit the copies into the top of the block.
-    for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
-           E = MRI.livein_end(); LI != E; ++LI)
-      if (LI->second) {
-        const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
-        bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
-                                        LI->second, LI->first, RC, RC);
-        assert(Emitted && "Unable to issue a live-in copy instruction!\n");
-        (void) Emitted;
-      }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
   MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
-  CurDAG(new SelectionDAG(TLI, *FuncInfo)),
-  SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)),
+  CurDAG(new SelectionDAG(tm, *FuncInfo)),
+  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
   DAGSize(0)
@@ -293,10 +180,6 @@ SelectionDAGISel::~SelectionDAGISel() {
   delete FuncInfo;
 }
 
-unsigned SelectionDAGISel::MakeReg(EVT VT) {
-  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
-}
-
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
   AU.addPreserved<AliasAnalysis>();
@@ -306,129 +189,75 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
-  Function &Fn = *mf.getFunction();
-
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
   assert((!EnableFastISelAbort || EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
-  // Get alias analysis for load/store combining.
-  AA = &getAnalysis<AliasAnalysis>();
-
-  MF = &mf;
+  const Function &Fn = *mf.getFunction();
   const TargetInstrInfo &TII = *TM.getInstrInfo();
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
 
-  if (Fn.hasGC())
-    GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn);
-  else
-    GFI = 0;
+  MF = &mf;
   RegInfo = &MF->getRegInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF, EnableFastISel);
   SDB->init(GFI, *AA);
 
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
-    if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
-      // Mark landing pad.
-      FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
-
-  SelectAllBasicBlocks(Fn, *MF, TII);
+  SelectAllBasicBlocks(Fn);
 
   // If the first basic block in the function has live ins that need to be
   // copied into vregs, emit the copies into the top of the block before
   // emitting the code for the block.
-  EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);
-
-  // Add function live-ins to entry block live-in set.
-  for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),
-         E = RegInfo->livein_end(); I != E; ++I)
-    MF->begin()->addLiveIn(I->first);
-
-#ifndef NDEBUG
-  assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&
-         "Not all catch info was assigned to a landing pad!");
-#endif
+  MachineBasicBlock *EntryMBB = MF->begin();
+  RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+
+  // Insert DBG_VALUE instructions for function arguments to the entry block.
+  for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+    MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+    unsigned Reg = MI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      EntryMBB->insert(EntryMBB->begin(), MI);
+    else {
+      MachineInstr *Def = RegInfo->getVRegDef(Reg);
+      MachineBasicBlock::iterator InsertPos = Def;
+      // FIXME: VR def may not be in entry block.
+      Def->getParent()->insert(llvm::next(InsertPos), MI);
+    }
+  }
 
+  // Release function-specific state. SDB and CurDAG are already cleared
+  // at this point.
   FuncInfo->clear();
 
   return true;
 }
 
-/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
-/// attached with this instruction.
-static void SetDebugLoc(Instruction *I, SelectionDAGBuilder *SDB,
-                        FastISel *FastIS, MachineFunction *MF) {
-  DebugLoc DL = I->getDebugLoc();
-  if (DL.isUnknown()) return;
-  
-  SDB->setCurDebugLoc(DL);
-
-  if (FastIS)
-    FastIS->setCurDebugLoc(DL);
-
-  // If the function doesn't have a default debug location yet, set
-  // it. This is kind of a hack.
-  if (MF->getDefaultDebugLoc().isUnknown())
-    MF->setDefaultDebugLoc(DL);
-}
-
-/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
-static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) {
-  SDB->setCurDebugLoc(DebugLoc());
-  if (FastIS)
-    FastIS->setCurDebugLoc(DebugLoc());
-}
-
-void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
-                                        BasicBlock::iterator Begin,
-                                        BasicBlock::iterator End,
-                                        bool &HadTailCall) {
-  SDB->setCurrentBasicBlock(BB);
-
+MachineBasicBlock *
+SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB,
+                                   const BasicBlock *LLVMBB,
+                                   BasicBlock::const_iterator Begin,
+                                   BasicBlock::const_iterator End,
+                                   bool &HadTailCall) {
   // Lower all of the non-terminator instructions. If a call is emitted
-  // as a tail call, cease emitting nodes for this block.
-  for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
-    SetDebugLoc(I, SDB, 0, MF);
-
-    if (!isa<TerminatorInst>(I)) {
-      SDB->visit(*I);
-
-      // Set the current debug location back to "unknown" so that it doesn't
-      // spuriously apply to subsequent instructions.
-      ResetDebugLoc(SDB, 0);
-    }
-  }
-
-  if (!SDB->HasTailCall) {
-    // Ensure that all instructions which are used outside of their defining
-    // blocks are available as virtual registers.  Invoke is handled elsewhere.
-    for (BasicBlock::iterator I = Begin; I != End; ++I)
-      if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
-        SDB->CopyToExportRegsIfNeeded(I);
-
-    // Handle PHI nodes in successor blocks.
-    if (End == LLVMBB->end()) {
-      HandlePHINodesInSuccessorBlocks(LLVMBB);
-
-      // Lower the terminator after the copies are emitted.
-      SetDebugLoc(LLVMBB->getTerminator(), SDB, 0, MF);
-      SDB->visit(*LLVMBB->getTerminator());
-      ResetDebugLoc(SDB, 0);
-    }
-  }
+  // as a tail call, cease emitting nodes for this block. Terminators
+  // are handled below.
+  for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+    SDB->visit(*I);
 
   // Make sure the root of the DAG is up-to-date.
   CurDAG->setRoot(SDB->getControlRoot());
-
-  // Final step, emit the lowered DAG as machine code.
-  CodeGenAndEmitDAG();
   HadTailCall = SDB->HasTailCall;
   SDB->clear();
+
+  // Final step, emit the lowered DAG as machine code.
+  return CodeGenAndEmitDAG(BB);
 }
 
 namespace {
@@ -493,7 +322,7 @@ void SelectionDAGISel::ShrinkDemandedOps() {
     InWorklist.insert(I);
   }
 
-  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
   while (!Worklist.empty()) {
     SDNode *N = Worklist.pop_back_val();
     InWorklist.erase(N);
@@ -613,7 +442,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   } while (!Worklist.empty());
 }
 
-void SelectionDAGISel::CodeGenAndEmitDAG() {
+MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
   std::string GroupName;
   if (TimePassesIsEnabled)
     GroupName = "Instruction Selection and Scheduling";
@@ -763,9 +592,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // inserted into.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Creation", GroupName);
-    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+    BB = Scheduler->EmitSchedule();
   } else {
-    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
+    BB = Scheduler->EmitSchedule();
   }
 
   // Free the scheduler state.
@@ -776,8 +605,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     delete Scheduler;
   }
 
-  DEBUG(dbgs() << "Selected machine code:\n");
-  DEBUG(BB->dump());
+  // Free the SelectionDAG state, now that we're finished with it.
+  CurDAG->clear();
+
+  return BB;
 }
 
 void SelectionDAGISel::DoInstructionSelection() {
@@ -836,128 +667,94 @@ void SelectionDAGISel::DoInstructionSelection() {
   PostprocessISelDAG();
 }
 
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) {
+  // Add a label to mark the beginning of the landing pad.  Deletion of the
+  // landing pad can thus be detected via the MachineModuleInfo.
+  MCSymbol *Label = MF->getMMI().addLandingPad(BB);
+
+  const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+  BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
+
+  // Mark exception register as live in.
+  unsigned Reg = TLI.getExceptionAddressRegister();
+  if (Reg) BB->addLiveIn(Reg);
+
+  // Mark exception selector register as live in.
+  Reg = TLI.getExceptionSelectorRegister();
+  if (Reg) BB->addLiveIn(Reg);
+
+  // FIXME: Hack around an exception handling flaw (PR1508): the personality
+  // function and list of typeids logically belong to the invoke (or, if you
+  // like, the basic block containing the invoke), and need to be associated
+  // with it in the dwarf exception handling tables.  Currently however the
+  // information is provided by an intrinsic (eh.selector) that can be moved
+  // to unexpected places by the optimizers: if the unwind edge is critical,
+  // then breaking it can result in the intrinsics being in the successor of
+  // the landing pad, not the landing pad itself.  This results
+  // in exceptions not being caught because no typeids are associated with
+  // the invoke.  This may not be the only way things can go wrong, but it
+  // is the only way we try to work around for the moment.
+  const BasicBlock *LLVMBB = BB->getBasicBlock();
+  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+  if (Br && Br->isUnconditional()) { // Critical edge?
+    BasicBlock::const_iterator I, E;
+    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+      if (isa<EHSelectorInst>(I))
+        break;
 
-void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
-                                            MachineFunction &MF,
-                                            const TargetInstrInfo &TII) {
+    if (I == E)
+      // No catch info found - try to extract some from the successor.
+      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
+  }
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
   if (EnableFastISel)
-    FastIS = TLI.createFastISel(MF, FuncInfo->ValueMap, FuncInfo->MBBMap,
-                                FuncInfo->StaticAllocaMap
+    FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap,
+                                FuncInfo->StaticAllocaMap,
+                                FuncInfo->PHINodesToUpdate
 #ifndef NDEBUG
                                 , FuncInfo->CatchInfoLost
 #endif
                                 );
 
   // Iterate over all basic blocks in the function.
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    BasicBlock *LLVMBB = &*I;
-    BB = FuncInfo->MBBMap[LLVMBB];
+  for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+    const BasicBlock *LLVMBB = &*I;
+    MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB];
 
-    BasicBlock::iterator const Begin = LLVMBB->begin();
-    BasicBlock::iterator const End = LLVMBB->end();
-    BasicBlock::iterator BI = Begin;
+    BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+    BasicBlock::const_iterator const End = LLVMBB->end();
+    BasicBlock::const_iterator BI = Begin;
 
     // Lower any arguments needed in this block if this is the entry block.
-    bool SuppressFastISel = false;
-    if (LLVMBB == &Fn.getEntryBlock()) {
+    if (LLVMBB == &Fn.getEntryBlock())
       LowerArguments(LLVMBB);
 
-      // If any of the arguments has the byval attribute, forgo
-      // fast-isel in the entry block.
-      if (FastIS) {
-        unsigned j = 1;
-        for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
-             I != E; ++I, ++j)
-          if (Fn.paramHasAttr(j, Attribute::ByVal)) {
-            if (EnableFastISelVerbose || EnableFastISelAbort)
-              dbgs() << "FastISel skips entry block due to byval argument\n";
-            SuppressFastISel = true;
-            break;
-          }
-      }
-    }
-
-    if (BB->isLandingPad()) {
-      // Add a label to mark the beginning of the landing pad.  Deletion of the
-      // landing pad can thus be detected via the MachineModuleInfo.
-      MCSymbol *Label = MF.getMMI().addLandingPad(BB);
-
-      const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL);
-      BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
-
-      // Mark exception register as live in.
-      unsigned Reg = TLI.getExceptionAddressRegister();
-      if (Reg) BB->addLiveIn(Reg);
-
-      // Mark exception selector register as live in.
-      Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) BB->addLiveIn(Reg);
-
-      // FIXME: Hack around an exception handling flaw (PR1508): the personality
-      // function and list of typeids logically belong to the invoke (or, if you
-      // like, the basic block containing the invoke), and need to be associated
-      // with it in the dwarf exception handling tables.  Currently however the
-      // information is provided by an intrinsic (eh.selector) that can be moved
-      // to unexpected places by the optimizers: if the unwind edge is critical,
-      // then breaking it can result in the intrinsics being in the successor of
-      // the landing pad, not the landing pad itself.  This results
-      // in exceptions not being caught because no typeids are associated with
-      // the invoke.  This may not be the only way things can go wrong, but it
-      // is the only way we try to work around for the moment.
-      BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
-      if (Br && Br->isUnconditional()) { // Critical edge?
-        BasicBlock::iterator I, E;
-        for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
-          if (isa<EHSelectorInst>(I))
-            break;
-
-        if (I == E)
-          // No catch info found - try to extract some from the successor.
-          CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF.getMMI(), *FuncInfo);
-      }
-    }
-
+    // Setup an EH landing-pad block.
+    if (BB->isLandingPad())
+      PrepareEHLandingPad(BB);
+    
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
-    if (FastIS && !SuppressFastISel) {
+    if (FastIS) {
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
         CurDAG->setRoot(SDB->getControlRoot());
-        CodeGenAndEmitDAG();
         SDB->clear();
+        BB = CodeGenAndEmitDAG(BB);
       }
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
       for (; BI != End; ++BI) {
-        // Just before the terminator instruction, insert instructions to
-        // feed PHI nodes in successor blocks.
-        if (isa<TerminatorInst>(BI))
-          if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
-            ++NumFastIselFailures;
-            ResetDebugLoc(SDB, FastIS);
-            if (EnableFastISelVerbose || EnableFastISelAbort) {
-              dbgs() << "FastISel miss: ";
-              BI->dump();
-            }
-            assert(!EnableFastISelAbort &&
-                   "FastISel didn't handle a PHI in a successor");
-            break;
-          }
-
-        SetDebugLoc(BI, SDB, FastIS, &MF);
-
         // Try to select the instruction with FastISel.
-        if (FastIS->SelectInstruction(BI)) {
-          ResetDebugLoc(SDB, FastIS);
+        if (FastIS->SelectInstruction(BI))
           continue;
-        }
-
-        // Clear out the debug location so that it doesn't carry over to
-        // unrelated instructions.
-        ResetDebugLoc(SDB, FastIS);
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
@@ -967,14 +764,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
             BI->dump();
           }
 
-          if (!BI->getType()->isVoidTy()) {
+          if (!BI->getType()->isVoidTy() && !BI->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[BI];
             if (!R)
               R = FuncInfo->CreateRegForValue(BI);
           }
 
           bool HadTailCall = false;
-          SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall);
+          BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall);
 
           // If the call was emitted as a tail call, we're done with the block.
           if (HadTailCall) {
@@ -1010,44 +807,41 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
     // block.
     if (BI != End) {
       bool HadTailCall;
-      SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
+      BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall);
     }
 
-    FinishBasicBlock();
+    FinishBasicBlock(BB);
+    FuncInfo->PHINodesToUpdate.clear();
   }
 
   delete FastIS;
 }
 
 void
-SelectionDAGISel::FinishBasicBlock() {
-
-  DEBUG(dbgs() << "Target-post-processed machine code:\n");
-  DEBUG(BB->dump());
+SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
 
   DEBUG(dbgs() << "Total amount of phi nodes to update: "
-               << SDB->PHINodesToUpdate.size() << "\n");
-  DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
+               << FuncInfo->PHINodesToUpdate.size() << "\n");
+  DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
           dbgs() << "Node " << i << " : ("
-                 << SDB->PHINodesToUpdate[i].first
-                 << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
+                 << FuncInfo->PHINodesToUpdate[i].first
+                 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
 
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
   if (SDB->SwitchCases.empty() &&
       SDB->JTCases.empty() &&
       SDB->BitTestCases.empty()) {
-    for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+    for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       if (!BB->isSuccessor(PHI->getParent()))
         continue;
-      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
-                                                false));
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
-    SDB->PHINodesToUpdate.clear();
     return;
   }
 
@@ -1056,37 +850,38 @@ SelectionDAGISel::FinishBasicBlock() {
     if (!SDB->BitTestCases[i].Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->BitTestCases[i].Parent;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDB->visitBitTestHeader(SDB->BitTestCases[i]);
+      SDB->visitBitTestHeader(SDB->BitTestCases[i], BB);
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
       SDB->clear();
+      BB = CodeGenAndEmitDAG(BB);
     }
 
     for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->BitTestCases[i].Cases[j].ThisBB;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
       if (j+1 != ej)
         SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
                               SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j]);
+                              SDB->BitTestCases[i].Cases[j],
+                              BB);
       else
         SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
                               SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j]);
+                              SDB->BitTestCases[i].Cases[j],
+                              BB);
 
 
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
       SDB->clear();
+      BB = CodeGenAndEmitDAG(BB);
     }
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
@@ -1094,10 +889,12 @@ SelectionDAGISel::FinishBasicBlock() {
       // from last "case" BB.
       if (PHIBB == SDB->BitTestCases[i].Default) {
         PHI->addOperand(MachineOperand::
-                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
         PHI->addOperand(MachineOperand::
-                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
                                                   back().ThisBB));
       }
@@ -1107,7 +904,8 @@ SelectionDAGISel::FinishBasicBlock() {
         MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
         if (cBB->isSuccessor(PHIBB)) {
           PHI->addOperand(MachineOperand::
-                          CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+                          CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                    false));
           PHI->addOperand(MachineOperand::CreateMBB(cBB));
         }
       }
@@ -1123,40 +921,42 @@ SelectionDAGISel::FinishBasicBlock() {
     if (!SDB->JTCases[i].first.Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
       BB = SDB->JTCases[i].first.HeaderBB;
-      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first);
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+                                BB);
       CurDAG->setRoot(SDB->getRoot());
-      CodeGenAndEmitDAG();
       SDB->clear();
+      BB = CodeGenAndEmitDAG(BB);
     }
 
     // Set the current basic block to the mbb we wish to insert the code into
     BB = SDB->JTCases[i].second.MBB;
-    SDB->setCurrentBasicBlock(BB);
     // Emit the code
     SDB->visitJumpTable(SDB->JTCases[i].second);
     CurDAG->setRoot(SDB->getRoot());
-    CodeGenAndEmitDAG();
     SDB->clear();
+    BB = CodeGenAndEmitDAG(BB);
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
       if (PHIBB == SDB->JTCases[i].second.Default) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
         PHI->addOperand
           (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
       if (BB->isSuccessor(PHIBB)) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
       }
     }
@@ -1165,13 +965,13 @@ SelectionDAGISel::FinishBasicBlock() {
 
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
-  for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
+  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
     assert(PHI->isPHI() &&
            "This is not a machine PHI node that we are updating!");
     if (BB->isSuccessor(PHI->getParent())) {
-      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
-                                                false));
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
   }
@@ -1181,26 +981,26 @@ SelectionDAGISel::FinishBasicBlock() {
   for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
     MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
-    SDB->setCurrentBasicBlock(BB);
 
-    // Emit the code
-    SDB->visitSwitchCase(SDB->SwitchCases[i]);
+    // Determine the unique successors.
+    SmallVector<MachineBasicBlock *, 2> Succs;
+    Succs.push_back(SDB->SwitchCases[i].TrueBB);
+    if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+      Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+    // Emit the code. Note that this could result in ThisBB being split, so
+    // we need to check for updates.
+    SDB->visitSwitchCase(SDB->SwitchCases[i], BB);
     CurDAG->setRoot(SDB->getRoot());
-    CodeGenAndEmitDAG();
+    SDB->clear();
+    ThisBB = CodeGenAndEmitDAG(BB);
 
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
     // occur multiple times in PHINodesToUpdate.  We have to be very careful to
     // handle them the right number of times.
-    while ((BB = SDB->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
-      // If new BB's are created during scheduling, the edges may have been
-      // updated. That is, the edge from ThisBB to BB may have been split and
-      // BB's predecessor is now another block.
-      DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
-        SDB->EdgeMapping.find(BB);
-      if (EI != SDB->EdgeMapping.end())
-        ThisBB = EI->second;
-
+    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+      BB = Succs[i];
       // BB may have been removed from the CFG if a branch was constant folded.
       if (ThisBB->isSuccessor(BB)) {
         for (MachineBasicBlock::iterator Phi = BB->begin();
@@ -1208,11 +1008,11 @@ SelectionDAGISel::FinishBasicBlock() {
              ++Phi) {
           // This value for this PHI node is recorded in PHINodesToUpdate.
           for (unsigned pn = 0; ; ++pn) {
-            assert(pn != SDB->PHINodesToUpdate.size() &&
+            assert(pn != FuncInfo->PHINodesToUpdate.size() &&
                    "Didn't find PHI entry!");
-            if (SDB->PHINodesToUpdate[pn].first == Phi) {
+            if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
               Phi->addOperand(MachineOperand::
-                              CreateReg(SDB->PHINodesToUpdate[pn].second,
+                              CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
                                         false));
               Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
               break;
@@ -1220,21 +1020,9 @@ SelectionDAGISel::FinishBasicBlock() {
           }
         }
       }
-
-      // Don't process RHS if same block as LHS.
-      if (BB == SDB->SwitchCases[i].FalseBB)
-        SDB->SwitchCases[i].FalseBB = 0;
-
-      // If we haven't handled the RHS, do so now.  Otherwise, we're done.
-      SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB;
-      SDB->SwitchCases[i].FalseBB = 0;
     }
-    assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0);
-    SDB->clear();
   }
   SDB->SwitchCases.clear();
-
-  SDB->PHINodesToUpdate.clear();
 }
 
 
@@ -1333,16 +1121,17 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
   std::vector<SDValue> InOps;
   std::swap(InOps, Ops);
 
-  Ops.push_back(InOps[0]);  // input chain.
-  Ops.push_back(InOps[1]);  // input asm string.
+  Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+  Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1
+  Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc
 
-  unsigned i = 2, e = InOps.size();
+  unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
   if (InOps[e-1].getValueType() == MVT::Flag)
     --e;  // Don't process a flag operand if it is here.
 
   while (i != e) {
     unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
-    if ((Flags & 7) != 4 /*MEM*/) {
+    if (!InlineAsm::isMemKind(Flags)) {
       // Just skip over this operand, copying the operands verbatim.
       Ops.insert(Ops.end(), InOps.begin()+i,
                  InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
@@ -1352,14 +1141,14 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
              "Memory operand with multiple values?");
       // Otherwise, this is a memory operand.  Ask the target to select it.
       std::vector<SDValue> SelOps;
-      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
-        llvm_report_error("Could not match memory address.  Inline asm"
-                          " failure!");
-      }
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+        report_fatal_error("Could not match memory address.  Inline asm"
+                           " failure!");
 
       // Add this to the output node.
-      Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
-                                              MVT::i32));
+      unsigned NewFlags =
+        InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+      Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
       Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
       i += 2;
     }
@@ -1436,7 +1225,8 @@ bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
 /// IsLegalToFold - Returns true if the specific operand node N of
 /// U can be folded during instruction selection that starts at Root.
 bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
-                                     bool IgnoreChains) const {
+                                     CodeGenOpt::Level OptLevel,
+                                     bool IgnoreChains) {
   if (OptLevel == CodeGenOpt::None) return false;
 
   // If Root use can somehow reach N through a path that that doesn't contain
@@ -2011,6 +1801,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
   }
 }
 
+namespace {
 
 struct MatchScope {
   /// FailIndex - If this match fails, this is the index to continue with.
@@ -2032,6 +1823,8 @@ struct MatchScope {
   bool HasChainNodesMatched, HasFlagResultNodesMatched;
 };
 
+}
+
 SDNode *SelectionDAGISel::
 SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                  unsigned TableSize) {
@@ -2045,6 +1838,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   //case ISD::VALUETYPE:
   //case ISD::CONDCODE:
   case ISD::HANDLENODE:
+  case ISD::MDNODE_SDNODE:
   case ISD::TargetConstant:
   case ISD::TargetConstantFP:
   case ISD::TargetConstantPool:
@@ -2383,7 +2177,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
                               NodeToMatch) ||
           !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
-                         NodeToMatch, true/*We validate our own chains*/))
+                         NodeToMatch, OptLevel,
+                         true/*We validate our own chains*/))
         break;
       
       continue;
@@ -2776,7 +2571,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
     else
       Msg << "unknown intrinsic #" << iid;
   }
-  llvm_report_error(Msg.str());
+  report_fatal_error(Msg.str());
 }
 
 char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ea2ff2f..8a4a1b1 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -480,7 +480,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 }
 
 /// NOTE: The constructor takes ownership of TLOF.
-TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
+TargetLowering::TargetLowering(const TargetMachine &tm,
+                               const TargetLoweringObjectFile *tlof)
   : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
@@ -720,7 +721,7 @@ void TargetLowering::computeRegisterProperties() {
       unsigned NElts = VT.getVectorNumElements();
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         EVT SVT = (MVT::SimpleValueType)nVT;
-        if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+        if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
             SVT.getVectorNumElements() > NElts && NElts != 1) {
           TransformToType[i] = SVT;
           ValueTypeActions.setTypeAction(VT, Promote);
@@ -1279,8 +1280,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
     if (DemandedMask == 1)
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
-                                               Op.getOperand(0), Op.getOperand(1)));
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+                                           Op.getOperand(0), Op.getOperand(1)));
 
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       EVT VT = Op.getValueType();
@@ -1465,23 +1467,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       case ISD::SRL:
         // Shrink SRL by a constant if none of the high bits shifted in are
         // demanded.
-        if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
-          APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
-                                                 OperandBitWidth - BitWidth);
-          HighBits = HighBits.lshr(ShAmt->getZExtValue());
-          HighBits.trunc(BitWidth);
-          
-          if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
-            // None of the shifted in bits are needed.  Add a truncate of the
-            // shift input, then shift it.
-            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
-                                                 Op.getValueType(), 
-                                                 In.getOperand(0));
-            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
-                                                     Op.getValueType(),
-                                                     NewTrunc, 
-                                                     In.getOperand(1)));
-          }
+        if (TLO.LegalTypes() &&
+            !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+          // undesirable.
+          break;
+        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+        if (!ShAmt)
+          break;
+        APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+                                               OperandBitWidth - BitWidth);
+        HighBits = HighBits.lshr(ShAmt->getZExtValue());
+        HighBits.trunc(BitWidth);
+
+        if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+          // None of the shifted in bits are needed.  Add a truncate of the
+          // shift input, then shift it.
+          SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+                                             Op.getValueType(), 
+                                             In.getOperand(0));
+          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+                                                   Op.getValueType(),
+                                                   NewTrunc, 
+                                                   In.getOperand(1)));
         }
         break;
       }
@@ -1874,10 +1882,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                 isa<ConstantSDNode>(Op0.getOperand(1)) &&
                 cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
-          if (Op0.getValueType() != VT)
+          if (Op0.getValueType().bitsGT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
                           DAG.getConstant(1, VT));
+          else if (Op0.getValueType().bitsLT(VT))
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                        DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+                        DAG.getConstant(1, VT));
+
           return DAG.getSetCC(dl, VT, Op0,
                               DAG.getConstant(0, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
@@ -2245,7 +2258,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
                                     int64_t &Offset) const {
   if (isa<GlobalAddressSDNode>(N)) {
     GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 0000000..d20477f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,21 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo() {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 0e6d479..5240bef 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -160,7 +160,7 @@ namespace {
           Args.clear();
           Args.append(CI->op_begin() + 1, CI->op_end());
 
-          InvokeInst *II = InvokeInst::Create(CI->getOperand(0),
+          InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
                                               NewBB, CleanupBB,
                                               Args.begin(), Args.end(),
                                               CI->getName(), CallBB);
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 15ca374..1f68a6f 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -179,7 +179,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
       if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
         DEBUG({
-            dbgs() << "Interfere with sub-register ";
+            dbgs() << "\t\tInterfere with sub-register ";
             li_->getInterval(*SR).print(dbgs(), tri_);
           });
         return false;
@@ -187,7 +187,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   }
 
   DEBUG({
-      dbgs() << "\nExtending: ";
+      dbgs() << "Extending: ";
       IntB.print(dbgs(), tri_);
     });
 
@@ -236,7 +236,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   // If the copy instruction was killing the destination register before the
   // merge, find the last use and trim the live range. That will also add the
   // isKill marker.
-  if (CopyMI->killsRegister(IntA.reg))
+  if (ALR->valno->isKill(CopyIdx))
     TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
 
   ++numExtends;
@@ -259,6 +259,9 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
     for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
       if (BI->valno == BValNo)
         continue;
+      // When BValNo is null, we're looking for a dummy clobber-value for a subreg.
+      if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy())
+        continue;
       if (BI->start <= AI->start && BI->end > AI->start)
         return true;
       if (BI->start > AI->start && BI->start < AI->end)
@@ -369,6 +372,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
     return false;
 
+  bool BHasSubRegs = false;
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+    BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
+
+  // Abort if the subregisters of IntB.reg have values that are not simply the
+  // clobbers from the superreg.
+  if (BHasSubRegs)
+    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
+      if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
+        return false;
+
   // If some of the uses of IntA.reg is already coalesced away, return false.
   // It's not possible to determine whether it's safe to perform the coalescing.
   for (MachineRegisterInfo::use_nodbg_iterator UI = 
@@ -417,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     BExtend[ALR->end] = BLR->end;
 
   // Update uses of IntA of the specific Val# with IntB.
-  bool BHasSubRegs = false;
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
-    BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
          UE = mri_->use_end(); UI != UE;) {
     MachineOperand &UseMO = UI.getOperand();
@@ -470,7 +481,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
   // simply extend BLR if CopyMI doesn't end the range.
   DEBUG({
-      dbgs() << "\nExtending: ";
+      dbgs() << "Extending: ";
       IntB.print(dbgs(), tri_);
     });
 
@@ -523,7 +534,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   DEBUG({
       dbgs() << "   result = ";
       IntB.print(dbgs(), tri_);
-      dbgs() << '\n';
       dbgs() << "\nShortening: ";
       IntA.print(dbgs(), tri_);
     });
@@ -709,7 +719,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   // kill.
   bool checkForDeadDef = false;
   MachineBasicBlock *MBB = CopyMI->getParent();
-  if (CopyMI->killsRegister(SrcInt.reg))
+  if (SrcLR->valno->isKill(DefIdx))
     if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
       checkForDeadDef = true;
     }
@@ -804,7 +814,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
           CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
         // If the use is a copy and it won't be coalesced away, and its source
         // is defined by a trivial computation, try to rematerialize it instead.
-        if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
+        if (!JoinedCopies.count(UseMI) &&
+            ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
                                     CopyDstSubIdx, UseMI))
           continue;
       }
@@ -824,6 +835,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
                     UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
           UseMI->addRegisterKilled(DstReg, tri_, true);
       }
+      DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
+                   << "\t" << *UseMI);
       continue;
     }
 
@@ -836,11 +849,11 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
       assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
     else if (SubIdx)
       O.setSubReg(SubIdx);
-    // Remove would-be duplicated kill marker.
-    if (O.isKill() && UseMI->killsRegister(DstReg))
-      O.setIsKill(false);
     O.setReg(DstReg);
 
+    DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
+                 << "\t" << *UseMI);
+
     // After updating the operand, check if the machine instruction has
     // become a copy. If so, update its val# information.
     if (JoinedCopies.count(UseMI))
@@ -865,38 +878,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
   }
 }
 
-/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
-/// due to live range lengthening as the result of coalescing.
-void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
-                                                      LiveInterval &LI) {
-  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
-         UE = mri_->use_end(); UI != UE; ++UI) {
-    MachineOperand &UseMO = UI.getOperand();
-    if (!UseMO.isKill())
-      continue;
-    MachineInstr *UseMI = UseMO.getParent();
-    SlotIndex UseIdx =
-      li_->getInstructionIndex(UseMI).getUseIndex();
-    const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
-    if (!LR ||
-        (!LR->valno->isKill(UseIdx.getDefIndex()) &&
-         LR->valno->def != UseIdx.getDefIndex())) {
-      // Interesting problem. After coalescing reg1027's def and kill are both
-      // at the same point:  %reg1027,0.000000e+00 = [56,814:0)  0@70-(814)
-      //
-      // bb5:
-      // 60   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
-      // 68   %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0
-      // 76   t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def>
-      // 84   %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0
-      // 96   t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill>
-      //
-      // Do not remove the kill marker on t2LDRi12.
-      UseMO.setIsKill(false);
-    }
-  }
-}
-
 /// removeIntervalIfEmpty - Check if the live interval of a physical register
 /// is empty, if so remove it and also remove the empty intervals of its
 /// sub-registers. Return true if live interval is removed.
@@ -1064,9 +1045,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
   unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
   unsigned Length = li_->getApproximateInstructionCount(DstInt);
   if (Length > Threshold &&
-      (((float)std::distance(mri_->use_nodbg_begin(DstInt.reg),
-                             mri_->use_nodbg_end()) / Length) < 
-        (1.0 / Threshold)))
+      std::distance(mri_->use_nodbg_begin(DstInt.reg),
+                    mri_->use_nodbg_end()) * Threshold < Length)
     return false;
 
   // If the virtual register live interval extends into a loop, turn down
@@ -1122,9 +1102,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
   unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
   unsigned Length = li_->getApproximateInstructionCount(SrcInt);
   if (Length > Threshold &&
-      (((float)std::distance(mri_->use_nodbg_begin(SrcInt.reg),
-                             mri_->use_nodbg_end()) / Length) < 
-          (1.0 / Threshold)))
+      std::distance(mri_->use_nodbg_begin(SrcInt.reg),
+                    mri_->use_nodbg_end()) * Threshold < Length)
     return false;
 
   if (SrcInt.empty())
@@ -1168,20 +1147,42 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
 /// two virtual registers from different register classes.
 bool
-SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
-                                                unsigned SmallReg,
-                                                unsigned Threshold) {
-  // Then make sure the intervals are *short*.
-  LiveInterval &LargeInt = li_->getInterval(LargeReg);
-  LiveInterval &SmallInt = li_->getInterval(SmallReg);
-  unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
-  unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
-  if (LargeSize > Threshold) {
-    unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
-                                       mri_->use_nodbg_end());
-    unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
-                                       mri_->use_nodbg_end());
-    if (SmallUses*LargeSize < LargeUses*SmallSize)
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
+                                                unsigned DstReg,
+                                             const TargetRegisterClass *SrcRC,
+                                             const TargetRegisterClass *DstRC,
+                                             const TargetRegisterClass *NewRC) {
+  unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
+  // This heuristics is good enough in practice, but it's obviously not *right*.
+  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+  // out all but the most restrictive register classes.
+  if (NewRCCount > 4 ||
+      // Early exit if the function is fairly small, coalesce aggressively if
+      // that's the case. For really special register classes with 3 or
+      // fewer registers, be a bit more careful.
+      (li_->getFuncInstructionCount() / NewRCCount) < 8)
+    return true;
+  LiveInterval &SrcInt = li_->getInterval(SrcReg);
+  LiveInterval &DstInt = li_->getInterval(DstReg);
+  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+  if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
+    return true;
+  // Estimate *register use density*. If it doubles or more, abort.
+  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+                                   mri_->use_nodbg_end());
+  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+                                   mri_->use_nodbg_end());
+  unsigned NewUses = SrcUses + DstUses;
+  unsigned NewSize = SrcSize + DstSize;
+  if (SrcRC != NewRC && SrcSize > NewRCCount) {
+    unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
+    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+      return false;
+  }
+  if (DstRC != NewRC && DstSize > NewRCCount) {
+    unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
+    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
       return false;
   }
   return true;
@@ -1263,7 +1264,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
     DEBUG({
-        dbgs() << "Interfere with register ";
+        dbgs() << "\t\tInterfere with register ";
         li_->getInterval(RealDstReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
@@ -1275,7 +1276,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
         !tri_->isSubRegister(DstReg, *SR) &&
         li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          dbgs() << "Interfere with sub-register ";
+          dbgs() << "\t\tInterfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
@@ -1298,7 +1299,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealSrcReg) &&
       LHS.overlaps(li_->getInterval(RealSrcReg))) {
     DEBUG({
-        dbgs() << "Interfere with register ";
+        dbgs() << "\t\tInterfere with register ";
         li_->getInterval(RealSrcReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
@@ -1310,7 +1311,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
         !tri_->isSubRegister(SrcReg, *SR) &&
         li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          dbgs() << "Interfere with sub-register ";
+          dbgs() << "\t\tInterfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
@@ -1400,6 +1401,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     return false;  // Not coalescable.
   }
 
+  // We cannot handle dual subreg indices and mismatched classes at the same
+  // time.
+  if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) {
+    DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n");
+    return false;
+  }
+
   // Check that a physical source register is compatible with dst regclass
   if (SrcIsPhys) {
     unsigned SrcSubReg = SrcSubIdx ?
@@ -1517,10 +1525,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           return false;  // Not coalescable
         }
 
-        unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
-        unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
-        unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
-        if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
+        if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+          DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
+                       << SrcRC->getName() << "/"
+                       << DstRC->getName() << " -> "
+                       << NewRC->getName() << ".\n");
           Again = true;  // May be possible to coalesce later.
           return false;
         }
@@ -1568,49 +1577,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       }
     }
 
-    unsigned LargeReg = SrcReg;
-    unsigned SmallReg = DstReg;
-
     // Now determine the register class of the joined register.
-    if (isExtSubReg) {
-      if (SubIdx && DstRC && DstRC->isASubClass()) {
-        // This is a move to a sub-register class. However, the source is a
-        // sub-register of a larger register class. We don't know what should
-        // the register class be. FIXME.
-        Again = true;
-        return false;
+    if (!SrcIsPhys && !DstIsPhys) {
+      if (isExtSubReg) {
+        NewRC =
+          SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
+      } else if (isInsSubReg) {
+        NewRC =
+          SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
+      } else {
+        NewRC = getCommonSubClass(SrcRC, DstRC);
       }
-      if (!DstIsPhys && !SrcIsPhys)
-        NewRC = SrcRC;
-    } else if (!SrcIsPhys && !DstIsPhys) {
-      NewRC = getCommonSubClass(SrcRC, DstRC);
+
       if (!NewRC) {
         DEBUG(dbgs() << "\tDisjoint regclasses: "
                      << SrcRC->getName() << ", "
                      << DstRC->getName() << ".\n");
         return false;           // Not coalescable.
       }
-      if (DstRC->getSize() > SrcRC->getSize())
-        std::swap(LargeReg, SmallReg);
-    }
 
-    // If we are joining two virtual registers and the resulting register
-    // class is more restrictive (fewer register, smaller size). Check if it's
-    // worth doing the merge.
-    if (!SrcIsPhys && !DstIsPhys &&
-        (isExtSubReg || DstRC->isASubClass()) &&
-        !isWinToJoinCrossClass(LargeReg, SmallReg,
-                               allocatableRCRegs_[NewRC].count())) {
-      DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
-                   << SrcRC->getName() << "/"
-                   << DstRC->getName() << " -> "
-                   << NewRC->getName() << ".\n");
-      // Allow the coalescer to try again in case either side gets coalesced to
-      // a physical register that's compatible with the other side. e.g.
-      // r1024 = MOV32to32_ r1025
-      // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
-      Again = true;  // May be possible to coalesce later.
-      return false;
+      // If we are joining two virtual registers and the resulting register
+      // class is more restrictive (fewer register, smaller size). Check if it's
+      // worth doing the merge.
+      if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
+                     << SrcRC->getName() << "/"
+                     << DstRC->getName() << " -> "
+                     << NewRC->getName() << ".\n");
+        // Allow the coalescer to try again in case either side gets coalesced to
+        // a physical register that's compatible with the other side. e.g.
+        // r1024 = MOV32to32_ r1025
+        // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+        Again = true;  // May be possible to coalesce later.
+        return false;
+      }
     }
   }
 
@@ -1626,9 +1626,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
          "Register mapping is horribly broken!");
 
   DEBUG({
-      dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_);
-      dbgs() << " and "; DstInt.print(dbgs(), tri_);
-      dbgs() << ": ";
+      dbgs() << "\t\tInspecting ";
+      if (SrcRC) dbgs() << SrcRC->getName() << ": ";
+      SrcInt.print(dbgs(), tri_);
+      dbgs() << "\n\t\t       and ";
+      if (DstRC) dbgs() << DstRC->getName() << ": ";
+      DstInt.print(dbgs(), tri_);
+      dbgs() << "\n";
     });
 
   // Save a copy of the virtual register live interval. We'll manually
@@ -1672,10 +1676,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
       unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
       unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-      float Ratio = 1.0 / Threshold;
       if (Length > Threshold &&
-          (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
-                                 mri_->use_nodbg_end()) / Length) < Ratio)) {
+          std::distance(mri_->use_nodbg_begin(JoinVReg),
+                        mri_->use_nodbg_end()) * Threshold < Length) {
         // Before giving up coalescing, if definition of source is defined by
         // trivial computation, try rematerializing it.
         if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
@@ -1701,7 +1704,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       // Only coalesce an empty interval (defined by implicit_def) with
       // another interval which has a valno defined by the CopyMI and the CopyMI
       // is a kill of the implicit def.
-      DEBUG(dbgs() << "Not profitable!\n");
+      DEBUG(dbgs() << "\tNot profitable!\n");
       return false;
     }
   } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) {
@@ -1718,12 +1721,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
          RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
       JoinedCopies.insert(CopyMI);
-      DEBUG(dbgs() << "Trivial!\n");
+      DEBUG(dbgs() << "\tTrivial!\n");
       return true;
     }
 
     // Otherwise, we are unable to join the intervals.
-    DEBUG(dbgs() << "Interference!\n");
+    DEBUG(dbgs() << "\tInterference!\n");
     Again = true;  // May be possible to coalesce later.
     return false;
   }
@@ -1794,12 +1797,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // Remember to delete the copy instruction.
   JoinedCopies.insert(CopyMI);
 
-  // Some live range has been lengthened due to colaescing, eliminate the
-  // unnecessary kills.
-  RemoveUnnecessaryKills(SrcReg, *ResDstInt);
-  if (TargetRegisterInfo::isVirtualRegister(DstReg))
-    RemoveUnnecessaryKills(DstReg, *ResDstInt);
-
   UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
 
   // If we have extended the live range of a physical register, make sure we
@@ -1843,7 +1840,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   }
 
   DEBUG({
-      dbgs() << "\n\t\tJoined.  Result = ";
+      dbgs() << "\t\tJoined. Result = ";
       ResDstInt->print(dbgs(), tri_);
       dbgs() << "\n";
     });
@@ -2198,7 +2195,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              dbgs() << "Interfere with sub-register ";
+              dbgs() << "\tInterfere with sub-register ";
               li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
@@ -2215,7 +2212,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              dbgs() << "Interfere with sub-register ";
+              dbgs() << "\tInterfere with sub-register ";
               li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
@@ -2673,13 +2670,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
   return NULL;
 }
 
-void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
-  if (TargetRegisterInfo::isPhysicalRegister(reg))
-    dbgs() << tri_->getName(reg);
-  else
-    dbgs() << "%reg" << reg;
-}
-
 void SimpleRegisterCoalescing::releaseMemory() {
   JoinedCopies.clear();
   ReMatCopies.clear();
@@ -2744,7 +2734,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
             // delete them later.
             DoDelete = false;
         }
-        if (MI->registerDefIsDead(DstReg)) {
+        if (MI->allDefsAreDead()) {
           LiveInterval &li = li_->getInterval(DstReg);
           if (!ShortenDeadCopySrcLiveRange(li, MI))
             ShortenDeadCopyLiveRange(li, MI);
@@ -2808,8 +2798,25 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         li_->RemoveMachineInstrFromMaps(MI);
         mii = mbbi->erase(mii);
         ++numPeep;
-      } else {
-        ++mii;
+        continue;
+      }
+
+      ++mii;
+
+      // Check for now unnecessary kill flags.
+      if (li_->isNotInMIMap(MI)) continue;
+      SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex();
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isKill()) continue;
+        unsigned reg = MO.getReg();
+        if (!reg || !li_->hasInterval(reg)) continue;
+        LiveInterval &LI = li_->getInterval(reg);
+        const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
+        if (!LR ||
+            (!LR->valno->isKill(UseIdx.getDefIndex()) &&
+             LR->valno->def != UseIdx.getDefIndex()))
+          MO.setIsKill(false);
       }
     }
   }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index f668064..1be04f3 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -179,8 +179,11 @@ namespace llvm {
 
     /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
     /// two virtual registers from different register classes.
-    bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
-                               unsigned Threshold);
+    bool isWinToJoinCrossClass(unsigned SrcReg,
+                               unsigned DstReg,
+                               const TargetRegisterClass *SrcRC,
+                               const TargetRegisterClass *DstRC,
+                               const TargetRegisterClass *NewRC);
 
     /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
     /// register with a physical register, check if any of the virtual register
@@ -220,10 +223,6 @@ namespace llvm {
     /// subregister.
     void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
 
-    /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
-    /// due to live range lengthening as the result of coalescing.
-    void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
-
     /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
     /// Return true if live interval is removed.
     bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI);
@@ -243,8 +242,6 @@ namespace llvm {
     /// cycles Start and End or NULL if there are no uses.
     MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
                                     unsigned Reg, SlotIndex &LastUseIdx) const;
-
-    void printRegName(unsigned reg) const;
   };
 
 } // End llvm namespace
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 7ba4403..63c5554 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -46,7 +46,6 @@ namespace {
 /// Utility class for spillers.
 class SpillerBase : public Spiller {
 protected:
-
   MachineFunction *mf;
   LiveIntervals *lis;
   MachineFrameInfo *mfi;
@@ -160,9 +159,11 @@ protected:
 
     return added;
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
 
 /// Spills any live range using the spill-everywhere method with no attempt at
 /// folding.
@@ -178,9 +179,12 @@ public:
     // Ignore spillIs - we don't use it.
     return trivialSpillEverywhere(li);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// Falls back on LiveIntervals::addIntervalsForSpills.
 class StandardSpiller : public Spiller {
 protected:
@@ -198,9 +202,12 @@ public:
                                    SlotIndex*) {
     return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
   }
-
 };
 
+} // end anonymous namespace
+
+namespace {
+
 /// When a call to spill is placed this spiller will first try to break the
 /// interval up into its component values (one new interval per value).
 /// If this fails, or if a call is placed to spill a previously split interval
@@ -513,15 +520,16 @@ private:
 
 };
 
-}
+} // end anonymous namespace
+
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
                                    const MachineLoopInfo *loopInfo,
                                    VirtRegMap *vrm) {
   switch (spillerOpt) {
-    case trivial: return new TrivialSpiller(mf, lis, vrm); break;
-    case standard: return new StandardSpiller(lis, loopInfo, vrm); break;
-    case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break;
-    default: llvm_unreachable("Unreachable!"); break;
+  default: assert(0 && "unknown spiller");
+  case trivial: return new TrivialSpiller(mf, lis, vrm);
+  case standard: return new StandardSpiller(lis, loopInfo, vrm);
+  case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
   }
 }
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 12d38f0..42dfd7f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -182,7 +182,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
         if (!LS->hasInterval(FI))
           continue;
         LiveInterval &li = LS->getInterval(FI);
-        li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+        if (!MI->isDebugValue())
+          li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
         SSRefs[FI].push_back(MI);
       }
     }
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index e9e998f..0ad6619 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -40,7 +40,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Don't know how to commute: " << *MI;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 
   assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d6bdb10..9f95993 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -38,110 +38,88 @@ using namespace dwarf;
 //===----------------------------------------------------------------------===//
 //                                  ELF
 //===----------------------------------------------------------------------===//
-typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
-
-TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
-  // If we have the section uniquing map, free it.
-  delete (ELFUniqueMapTy*)UniquingMap;
-}
-
-const MCSection *TargetLoweringObjectFileELF::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind, bool IsExplicit) const {
-  if (UniquingMap == 0)
-    UniquingMap = new ELFUniqueMapTy();
-  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
-  if (Entry.getValue()) return Entry.getValue();
-
-  MCSectionELF *Result = MCSectionELF::Create(Entry.getKey(), Type, Flags, Kind,
-                                              IsExplicit, getContext());
-  Entry.setValue(Result);
-  return Result;
-}
 
 void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
                                              const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((ELFUniqueMapTy*)UniquingMap)->clear();
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   BSSSection =
-    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
+    getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getBSS());
 
   TextSection =
-    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getText());
+    getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_EXECINSTR |
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getText());
 
   DataSection =
-    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
 
   ReadOnlySection =
-    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC,
-                  SectionKind::getReadOnly());
+    getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
 
   TLSDataSection =
-    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
+    getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                               MCSectionELF::SHF_WRITE,
+                               SectionKind::getThreadData());
 
   TLSBSSSection =
-    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
+    getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                               MCSectionELF::SHF_WRITE,
+                               SectionKind::getThreadBSS());
 
   DataRelSection =
-    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   DataRelLocalSection =
-    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRelLocal());
+    getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRelLocal());
 
   DataRelROSection =
-    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRel());
+    getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRel());
 
   DataRelROLocalSection =
-    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRelLocal());
+    getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRelLocal());
 
   MergeableConst4Section =
-    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst4());
+    getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst4());
 
   MergeableConst8Section =
-    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst8());
+    getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst8());
 
   MergeableConst16Section =
-    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst16());
+    getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+                               SectionKind::getMergeableConst16());
 
   StaticCtorSection =
-    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   StaticDtorSection =
-    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   // Exception Handling Sections.
 
@@ -150,47 +128,48 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
   // runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
   LSDASection =
-    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
+    getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
   EHFrameSection =
-    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+                               SectionKind::getDataRel());
 
   // Debug Info Sections.
   DwarfAbbrevSection =
-    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfInfoSection =
-    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfLineSection =
-    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfFrameSection =
-    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfStrSection =
-    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfLocSection =
-    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfARangesSection =
-    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfRangesSection =
-    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
+    getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
 }
 
 
@@ -279,9 +258,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
-  return getELFSection(SectionName,
-                       getELFSectionType(SectionName, Kind),
-                       getELFSectionFlags(Kind), Kind, true);
+  return getContext().getELFSection(SectionName,
+                                    getELFSectionType(SectionName, Kind),
+                                    getELFSectionFlags(Kind), Kind, true);
 }
 
 static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
@@ -300,19 +279,54 @@ static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
   return ".gnu.linkonce.d.rel.ro.";
 }
 
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".text.";
+  if (Kind.isReadOnly())             return ".rodata.";
+
+  if (Kind.isThreadData())           return ".tdata.";
+  if (Kind.isThreadBSS())            return ".tbss.";
+
+  if (Kind.isDataNoRel())            return ".data.";
+  if (Kind.isDataRelLocal())         return ".data.rel.local.";
+  if (Kind.isDataRel())              return ".data.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".data.rel.ro.";
+}
+
+
 const MCSection *TargetLoweringObjectFileELF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
+  // If we have -ffunction-section or -fdata-section then we should emit the
+  // global value to a uniqued section specifically for it.
+  bool EmitUniquedSection;
+  if (Kind.isText())
+    EmitUniquedSection = TM.getFunctionSections();
+  else 
+    EmitUniquedSection = TM.getDataSections();
 
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
-    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
+  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+      !Kind.isCommon() && !Kind.isBSS()) {
+    const char *Prefix;
+    if (GV->isWeakForLinker())
+      Prefix = getSectionPrefixForUniqueGlobal(Kind);
+    else {
+      assert(EmitUniquedSection);
+      Prefix = getSectionPrefixForGlobal(Kind);
+    }
+
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
     MCSymbol *Sym = Mang->getSymbol(GV);
     Name.append(Sym->getName().begin(), Sym->getName().end());
-    return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
-                         getELFSectionFlags(Kind), Kind);
+    return getContext().getELFSection(Name.str(),
+                                      getELFSectionType(Name.str(), Kind),
+                                      getELFSectionFlags(Kind), Kind);
   }
 
   if (Kind.isText()) return TextSection;
@@ -337,11 +351,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
 
     std::string Name = SizeSpec + utostr(Align);
-    return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
-                         MCSectionELF::SHF_ALLOC |
-                         MCSectionELF::SHF_MERGE |
-                         MCSectionELF::SHF_STRINGS,
-                         Kind);
+    return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS,
+                                      MCSectionELF::SHF_ALLOC |
+                                      MCSectionELF::SHF_MERGE |
+                                      MCSectionELF::SHF_STRINGS,
+                                      Kind);
   }
 
   if (Kind.isMergeableConst()) {
@@ -426,43 +440,6 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
-TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
-  // If we have the MachO uniquing map, free it.
-  delete (MachOUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(StringRef Segment, StringRef Section,
-                unsigned TypeAndAttributes,
-                unsigned Reserved2, SectionKind Kind) const {
-  // We unique sections by their segment/section pair.  The returned section
-  // may not have the same flags as the requested section, if so this should be
-  // diagnosed by the client as an error.
-
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
-  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
-
-  // Form the name to look up.
-  SmallString<64> Name;
-  Name += Segment;
-  Name.push_back(',');
-  Name += Section;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = Map[Name.str()];
-  if (Entry) return Entry;
-
-  // Otherwise, return a new section.
-  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
-                                        Reserved2, Kind, getContext());
-}
-
-
 void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                                const TargetMachine &TM) {
   // _foo.eh symbols are currently always exported so that the linker knows
@@ -473,29 +450,31 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   IsFunctionEHFrameSymbolPrivate = false;
   SupportsWeakOmittedEHFrame = false;
   
-  if (UniquingMap != 0)
-    ((MachOUniqueMapTy*)UniquingMap)->clear();
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   TextSection // .text
-    = getMachOSection("__TEXT", "__text",
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__text",
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
   DataSection // .data
-    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
+    = getContext().getMachOSection("__DATA", "__data", 0,
+                                   SectionKind::getDataRel());
 
   CStringSection // .cstring
-    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
-                      SectionKind::getMergeable1ByteCString());
+    = getContext().getMachOSection("__TEXT", "__cstring", 
+                                   MCSectionMachO::S_CSTRING_LITERALS,
+                                   SectionKind::getMergeable1ByteCString());
   UStringSection
-    = getMachOSection("__TEXT","__ustring", 0,
-                      SectionKind::getMergeable2ByteCString());
+    = getContext().getMachOSection("__TEXT","__ustring", 0,
+                                   SectionKind::getMergeable2ByteCString());
   FourByteConstantSection // .literal4
-    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
-                      SectionKind::getMergeableConst4());
+    = getContext().getMachOSection("__TEXT", "__literal4",
+                                   MCSectionMachO::S_4BYTE_LITERALS,
+                                   SectionKind::getMergeableConst4());
   EightByteConstantSection // .literal8
-    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
-                      SectionKind::getMergeableConst8());
+    = getContext().getMachOSection("__TEXT", "__literal8", 
+                                   MCSectionMachO::S_8BYTE_LITERALS,
+                                   SectionKind::getMergeableConst8());
 
   // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
   // to using it in -static mode.
@@ -503,110 +482,130 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   if (TM.getRelocationModel() != Reloc::Static &&
       TM.getTargetData()->getPointerSize() == 32)
     SixteenByteConstantSection =   // .literal16
-      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
-                      SectionKind::getMergeableConst16());
+      getContext().getMachOSection("__TEXT", "__literal16",
+                                   MCSectionMachO::S_16BYTE_LITERALS,
+                                   SectionKind::getMergeableConst16());
 
   ReadOnlySection  // .const
-    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
+    = getContext().getMachOSection("__TEXT", "__const", 0,
+                                   SectionKind::getReadOnly());
 
   TextCoalSection
-    = getMachOSection("__TEXT", "__textcoal_nt",
-                      MCSectionMachO::S_COALESCED |
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__textcoal_nt",
+                                   MCSectionMachO::S_COALESCED |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
   ConstTextCoalSection
-    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__TEXT", "__const_coal", 
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getText());
   ConstDataCoalSection
-    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
+    = getContext().getMachOSection("__DATA","__const_coal",
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getText());
   ConstDataSection  // .const_data
-    = getMachOSection("__DATA", "__const", 0,
-                      SectionKind::getReadOnlyWithRel());
+    = getContext().getMachOSection("__DATA", "__const", 0,
+                                   SectionKind::getReadOnlyWithRel());
   DataCoalSection
-    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
-                      SectionKind::getDataRel());
+    = getContext().getMachOSection("__DATA","__datacoal_nt", 
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getDataRel());
   DataCommonSection
-    = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
-                      SectionKind::getBSS());
+    = getContext().getMachOSection("__DATA","__common",
+                                   MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
   DataBSSSection
-    = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
-                    SectionKind::getBSS());
+    = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
   
 
   LazySymbolPointerSection
-    = getMachOSection("__DATA", "__la_symbol_ptr",
-                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
+    = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
+                                   MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
   NonLazySymbolPointerSection
-    = getMachOSection("__DATA", "__nl_symbol_ptr",
-                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
+    = getContext().getMachOSection("__DATA", "__nl_symbol_ptr",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
 
   if (TM.getRelocationModel() == Reloc::Static) {
     StaticCtorSection
-      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
+      = getContext().getMachOSection("__TEXT", "__constructor", 0,
+                                     SectionKind::getDataRel());
     StaticDtorSection
-      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
+      = getContext().getMachOSection("__TEXT", "__destructor", 0,
+                                     SectionKind::getDataRel());
   } else {
     StaticCtorSection
-      = getMachOSection("__DATA", "__mod_init_func",
-                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
-                        SectionKind::getDataRel());
+      = getContext().getMachOSection("__DATA", "__mod_init_func",
+                                     MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
     StaticDtorSection
-      = getMachOSection("__DATA", "__mod_term_func",
-                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
-                        SectionKind::getDataRel());
+      = getContext().getMachOSection("__DATA", "__mod_term_func",
+                                     MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
   }
 
   // Exception Handling.
-  LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
-                                SectionKind::getReadOnlyWithRel());
+  LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
+                                             SectionKind::getReadOnlyWithRel());
   EHFrameSection =
-    getMachOSection("__TEXT", "__eh_frame",
-                    MCSectionMachO::S_COALESCED |
-                    MCSectionMachO::S_ATTR_NO_TOC |
-                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
-                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
-                    SectionKind::getReadOnly());
+    getContext().getMachOSection("__TEXT", "__eh_frame",
+                                 MCSectionMachO::S_COALESCED |
+                                 MCSectionMachO::S_ATTR_NO_TOC |
+                                 MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                                 MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                                 SectionKind::getReadOnly());
 
   // Debug Information.
   DwarfAbbrevSection =
-    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_abbrev", 
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfInfoSection =
-    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_info",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfLineSection =
-    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_line",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfFrameSection =
-    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_frame",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_pubnames",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_pubtypes",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfStrSection =
-    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_str",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfLocSection =
-    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_loc",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfARangesSection =
-    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_aranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfRangesSection =
-    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_ranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_macinfo",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
   DwarfDebugInlineSection =
-    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
+    getContext().getMachOSection("__DWARF", "__debug_inlined",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
 }
 
 const MCSection *TargetLoweringObjectFileMachO::
@@ -619,8 +618,8 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
     MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
                                           TAA, StubSize);
   if (!ErrorCode.empty()) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
                       "' has an invalid section specifier '" + GV->getSection()+
                       "': " + ErrorCode + ".");
     // Fall back to dropping it into the data section.
@@ -629,14 +628,14 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // Get the section.
   const MCSectionMachO *S =
-    getMachOSection(Segment, Section, TAA, StubSize, Kind);
+    getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
 
   // Okay, now that we got the section, verify that the TAA & StubSize agree.
   // If the user declared multiple globals with different section flags, we need
   // to reject it here.
   if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
                       "' section type or attributes does not match previous"
                       " section specifier");
   }
@@ -806,7 +805,7 @@ const MCSection *TargetLoweringObjectFileCOFF::
 getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
   // Create the map if it doesn't already exist.
   if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
+    UniquingMap = new COFFUniqueMapTy();
   COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
 
   // Do the lookup, if we have a hit, return it.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 0b7fde7..7f0412c 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -9,7 +9,9 @@
 
 #define DEBUG_TYPE "virtregrewriter"
 #include "VirtRegRewriter.h"
+#include "VirtRegMap.h"
 #include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -893,7 +895,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
 
         bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
         int SSorRMId = DoReMat
-          ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat;
+          ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
 
         // Back-schedule reloads and remats.
         MachineBasicBlock::iterator InsertLoc =
@@ -1064,6 +1066,7 @@ class LocalRewriter : public VirtRegRewriter {
   VirtRegMap *VRM;
   BitVector AllocatableRegs;
   DenseMap<MachineInstr*, unsigned> DistanceMap;
+  DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
 
   MachineBasicBlock *MBB;       // Basic block currently being processed.
 
@@ -1188,12 +1191,24 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
   // Mark unused spill slots.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   int SS = VRM->getLowSpillSlot();
-  if (SS != VirtRegMap::NO_STACK_SLOT)
-    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS)
+  if (SS != VirtRegMap::NO_STACK_SLOT) {
+    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
+      SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
       if (!VRM->isSpillSlotUsed(SS)) {
         MFI->RemoveStackObject(SS);
+        for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
+          MachineInstr *DVMI = DbgValues[j];
+          MachineBasicBlock *DVMBB = DVMI->getParent();
+          DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
+          VRM->RemoveMachineInstrFromMaps(DVMI);
+          DVMBB->erase(DVMI);
+        }
         ++NumDSS;
       }
+      DbgValues.clear();
+    }
+  }
+  Slot2DbgValues.clear();
 
   return true;
 }
@@ -1903,6 +1918,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     bool BackTracked = false;
     MachineInstr &MI = *MII;
 
+    // Remember DbgValue's which reference stack slots.
+    if (MI.isDebugValue() && MI.getOperand(0).isFI())
+      Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
+
     /// ReusedOperands - Keep track of operand reuse in case we need to undo
     /// reuse.
     ReuseInfo ReusedOperands(MI, TRI);
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
index 44f9df6..93474e0 100644
--- a/lib/CodeGen/VirtRegRewriter.h
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -10,11 +10,10 @@
 #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
 #define LLVM_CODEGEN_VIRTREGREWRITER_H
 
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "VirtRegMap.h"
-
 namespace llvm {
+  class LiveIntervals;
+  class MachineFunction;
+  class VirtRegMap;
   
   /// VirtRegRewriter interface: Implementations of this interface assign
   /// spilled virtual registers to stack slots, rewriting the code.
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index da21c2d..b17827e 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -379,27 +379,27 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
   switch (NumArgs) {
   case 3:
    if (FTy->getParamType(2) != PPInt8Ty) {
-     llvm_report_error("Invalid type for third argument of main() supplied");
+     report_fatal_error("Invalid type for third argument of main() supplied");
    }
    // FALLS THROUGH
   case 2:
    if (FTy->getParamType(1) != PPInt8Ty) {
-     llvm_report_error("Invalid type for second argument of main() supplied");
+     report_fatal_error("Invalid type for second argument of main() supplied");
    }
    // FALLS THROUGH
   case 1:
    if (!FTy->getParamType(0)->isIntegerTy(32)) {
-     llvm_report_error("Invalid type for first argument of main() supplied");
+     report_fatal_error("Invalid type for first argument of main() supplied");
    }
    // FALLS THROUGH
   case 0:
    if (!FTy->getReturnType()->isIntegerTy() &&
        !FTy->getReturnType()->isVoidTy()) {
-     llvm_report_error("Invalid return type of main() supplied");
+     report_fatal_error("Invalid return type of main() supplied");
    }
    break;
   default:
-   llvm_report_error("Invalid number of arguments of main() supplied");
+   report_fatal_error("Invalid number of arguments of main() supplied");
   }
   
   ArgvArray CArgv;
@@ -771,7 +771,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "ConstantExpr not handled: " << *CE;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 
   GenericValue Result;
@@ -807,7 +807,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "ERROR: Constant unimplemented for type: " << *C->getType();
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
   return Result;
 }
@@ -935,7 +935,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Cannot load value of type " << *Ty << "!";
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
 }
 
@@ -1051,7 +1051,7 @@ void ExecutionEngine::emitGlobals() {
             sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
           addGlobalMapping(I, SymAddr);
         else {
-          llvm_report_error("Could not resolve external global address: "
+          report_fatal_error("Could not resolve external global address: "
                             +I->getName());
         }
       }
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index a2aad5a..0748b54 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -631,7 +631,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) {
   do {
     ECStack.pop_back();
     if (ECStack.empty())
-      llvm_report_error("Empty stack during unwind!");
+      report_fatal_error("Empty stack during unwind!");
     Inst = ECStack.back().Caller.getInstruction();
   } while (!(Inst && isa<InvokeInst>(Inst)));
 
@@ -644,7 +644,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) {
 }
 
 void Interpreter::visitUnreachableInst(UnreachableInst &I) {
-  llvm_report_error("Program executed an 'unreachable' instruction!");
+  report_fatal_error("Program executed an 'unreachable' instruction!");
 }
 
 void Interpreter::visitBranchInst(BranchInst &I) {
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 3ba783b..26a53b5 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -126,7 +126,7 @@ static ffi_type *ffiTypeFor(const Type *Ty) {
     default: break;
   }
   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
-  llvm_report_error("Type could not be mapped for use with libffi.");
+  report_fatal_error("Type could not be mapped for use with libffi.");
   return NULL;
 }
 
@@ -174,7 +174,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
     default: break;
   }
   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
-  llvm_report_error("Type value could not be mapped for use with libffi.");
+  report_fatal_error("Type value could not be mapped for use with libffi.");
   return NULL;
 }
 
@@ -188,7 +188,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
   // TODO: We don't have type information about the remaining arguments, because
   // this information is never passed into ExecutionEngine::runFunction().
   if (ArgVals.size() > NumArgs && F->isVarArg()) {
-    llvm_report_error("Calling external var arg function '" + F->getName()
+    report_fatal_error("Calling external var arg function '" + F->getName()
                       + "' is not supported by the Interpreter.");
   }
 
@@ -284,7 +284,7 @@ GenericValue Interpreter::callExternalFunction(Function *F,
     errs() << "Tried to execute an unknown external function: "
       << F->getType()->getDescription() << " __main\n";
   else
-    llvm_report_error("Tried to execute an unknown external function: " +
+    report_fatal_error("Tried to execute an unknown external function: " +
                       F->getType()->getDescription() + " " +F->getName());
 #ifndef USE_LIBFFI
   errs() << "Recompiling LLVM with --enable-libffi might help.\n";
@@ -325,7 +325,7 @@ GenericValue lle_X_exit(const FunctionType *FT,
 GenericValue lle_X_abort(const FunctionType *FT,
                          const std::vector<GenericValue> &Args) {
   //FIXME: should we report or raise here?
-  //llvm_report_error("Interpreted program raised SIGABRT");
+  //report_fatal_error("Interpreted program raised SIGABRT");
   raise (SIGABRT);
   return GenericValue();
 }
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index bc4200b..564e9ab 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -190,12 +190,10 @@ public:
     return &(ECStack.back ().VarArgs[0]);
   }
 
-  //FIXME: private:
-public:
+private:  // Helper functions
   GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I,
                                    gep_type_iterator E, ExecutionContext &SF);
 
-private:  // Helper functions
   // SwitchToNewBasicBlock - Start execution in a new basic block and run any
   // PHI nodes in the top of the block.  This is used for intraprocedural
   // control flow.
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index c00b60a..b367033 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -142,7 +142,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
       return RP;
 
   if (AbortOnFailure) {
-    llvm_report_error("Program used external function '"+Name+
+    report_fatal_error("Program used external function '"+Name+
                       "' which could not be resolved!");
   }
   return 0;
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index dd74d73..546d2b2 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -304,7 +304,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   // Turn the machine code intermediate representation into bytes in memory that
   // may be executed.
   if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
-    llvm_report_error("Target does not support machine code emission!");
+    report_fatal_error("Target does not support machine code emission!");
   }
   
   // Register routine for informing unwinding runtime about new EH frames
@@ -352,7 +352,7 @@ void JIT::addModule(Module *M) {
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      llvm_report_error("Target does not support machine code emission!");
+      report_fatal_error("Target does not support machine code emission!");
     }
     
     // Initialize passes.
@@ -383,7 +383,7 @@ bool JIT::removeModule(Module *M) {
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      llvm_report_error("Target does not support machine code emission!");
+      report_fatal_error("Target does not support machine code emission!");
     }
     
     // Initialize passes.
@@ -665,7 +665,7 @@ void *JIT::getPointerToFunction(Function *F) {
   // exists in this Module.
   std::string ErrorMsg;
   if (F->Materialize(&ErrorMsg)) {
-    llvm_report_error("Error reading function '" + F->getName()+
+    report_fatal_error("Error reading function '" + F->getName()+
                       "' from bitcode file: " + ErrorMsg);
   }
 
@@ -704,7 +704,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
 #endif
     Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
     if (Ptr == 0) {
-      llvm_report_error("Could not resolve external global address: "
+      report_fatal_error("Could not resolve external global address: "
                         +GV->getName());
     }
     addGlobalMapping(GV, Ptr);
@@ -754,7 +754,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
   // situation. It's returned in the same block of memory as code which may
   // not be writable.
   if (isGVCompilationDisabled() && !GV->isConstant()) {
-    llvm_report_error("Compilation of non-internal GlobalValue is disabled!");
+    report_fatal_error("Compilation of non-internal GlobalValue is disabled!");
   }
 
   // Some applications require globals and code to live together, so they may
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 68471bd..749a57d 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -80,7 +80,7 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
 
   // Copy the binary into the .text section.  This isn't necessary, but it's
   // useful to be able to disassemble the ELF by hand.
-  ELFSection &Text = EW.getTextSection((Function *)F);
+  ELFSection &Text = EW.getTextSection(const_cast<Function *>(F));
   Text.Addr = (uint64_t)I.FnStart;
   // TODO: We could eliminate this copy if we somehow used a pointer/size pair
   // instead of a vector.
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 2f42e6b..4b3ca87 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -52,7 +52,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
       
   unsigned char* Result = 0;
 
-  const std::vector<Function *> Personalities = MMI->getPersonalities();
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
   EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
 
   Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
@@ -75,7 +75,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
     MCSymbol *Label = Move.getLabel();
     
     // Throw out move if the label is invalid.
-    if (Label && !Label->isDefined())
+    if (Label && (*JCE->getLabelLocations())[Label] == 0)
       continue;
     
     intptr_t LabelPtr = 0;
@@ -199,9 +199,9 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   assert(MMI && "MachineModuleInfo not registered!");
 
   // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  MMI->TidyLandingPads(JCE->getLabelLocations());
 
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
   if (PadInfos.empty()) return 0;
@@ -450,7 +450,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
 
   // Emit the type ids.
   for (unsigned M = TypeInfos.size(); M; --M) {
-    GlobalVariable *GV = TypeInfos[M - 1];
+    const GlobalVariable *GV = TypeInfos[M - 1];
     
     if (GV) {
       if (TD->getPointerSize() == sizeof(int32_t))
@@ -609,7 +609,7 @@ unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F,
   
   FinalSize += GetExceptionTableSizeInBytes(&F);
       
-  const std::vector<Function *> Personalities = MMI->getPersonalities();
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
   FinalSize += 
     GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]);
 
@@ -711,7 +711,7 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
     MCSymbol *Label = Move.getLabel();
     
     // Throw out move if the label is invalid.
-    if (Label && !Label->isDefined())
+    if (Label && (*JCE->getLabelLocations())[Label] == 0)
       continue;
     
     intptr_t LabelPtr = 0;
@@ -780,9 +780,9 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
   unsigned FinalSize = 0;
 
   // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  MMI->TidyLandingPads(JCE->getLabelLocations());
 
-  const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
   if (PadInfos.empty()) return 0;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 7b8ab9e..e3855b2 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -30,8 +31,8 @@
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -43,7 +44,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Disassembler.h"
 #include "llvm/System/Memory.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -369,7 +369,7 @@ namespace {
     ValueMap<const Function *, EmittedCode,
              EmittedFunctionConfig> EmittedFunctions;
 
-    DILocation PrevDLT;
+    DebugLoc PrevDL;
 
     /// Instance of the JIT
     JIT *TheJIT;
@@ -377,14 +377,14 @@ namespace {
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
       : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
-        EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) {
+        EmittedFunctions(this), TheJIT(&jit) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
         DEBUG(dbgs() << "JIT is managing a GOT\n");
       }
 
-      if (DwarfExceptionHandling || JITEmitDebugInfo) {
+      if (JITExceptionHandling || JITEmitDebugInfo) {
         DE.reset(new JITDwarfEmitter(jit));
       }
       if (JITEmitDebugInfo) {
@@ -463,6 +463,10 @@ namespace {
       LabelLocations[Label] = getCurrentPCValue();
     }
 
+    virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() {
+      return &LabelLocations;
+    }
+
     virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
       assert(LabelLocations.count(Label) && "Label not emitted!");
       return LabelLocations.find(Label)->second;
@@ -737,7 +741,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 
     // If lazy compilation is disabled, emit a useful error message and abort.
     if (!JR->TheJIT->isCompilingLazily()) {
-      llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
+      report_fatal_error("LLVM JIT requested to do lazy compilation of function '"
                         + F->getName() + "' when lazy compiles are disabled!");
     }
 
@@ -823,19 +827,17 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
 void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
   if (DL.isUnknown()) return;
   if (!BeforePrintingInsn) return;
-
-  // FIXME: This is horribly inefficient.
-  DILocation CurDLT(DL.getAsMDNode(
-    EmissionDetails.MF->getFunction()->getContext()));
   
-  if (CurDLT.getScope().getNode() != 0 && PrevDLT.getNode() !=CurDLT.getNode()){
+  const LLVMContext& Context = EmissionDetails.MF->getFunction()->getContext();
+
+  if (DL.getScope(Context) != 0 && PrevDL != DL) {
     JITEvent_EmittedFunctionDetails::LineStart NextLine;
     NextLine.Address = getCurrentPCValue();
     NextLine.Loc = DL;
     EmissionDetails.LineStarts.push_back(NextLine);
   }
 
-  PrevDLT = CurDLT;
+  PrevDL = DL;
 }
 
 static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
@@ -945,7 +947,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(
        std::string msg;
        raw_string_ostream Msg(msg);
        Msg << "ConstantExpr not handled: " << *CE;
-       llvm_report_error(Msg.str());
+       report_fatal_error(Msg.str());
     }
     }
   }
@@ -997,12 +999,13 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
       for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) {
         const MachineOperand &MO = MI.getOperand(CurOp);
         if (MO.isGlobal()) {
-          GlobalValue* V = MO.getGlobal();
+          const GlobalValue* V = MO.getGlobal();
           const GlobalVariable *GV = dyn_cast<const GlobalVariable>(V);
           if (!GV)
             continue;
           // If seen in previous function, it will have an entry here.
-          if (TheJIT->getPointerToGlobalIfAvailable(GV))
+          if (TheJIT->getPointerToGlobalIfAvailable(
+                const_cast<GlobalVariable *>(GV)))
             continue;
           // If seen earlier in this function, it will have an entry here.
           // FIXME: it should be possible to combine these tables, by
@@ -1212,6 +1215,9 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
                                 EmissionDetails);
 
+  // Reset the previous debug location.
+  PrevDL = DebugLoc();
+
   DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
         << "] Function: " << F.getFunction()->getName()
         << ": " << (FnEnd-FnStart) << " bytes of text, "
@@ -1223,45 +1229,44 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   // Mark code region readable and executable if it's not so already.
   MemMgr->setMemoryExecutable();
 
-  DEBUG(
-    if (sys::hasDisassembler()) {
-      dbgs() << "JIT: Disassembled code:\n";
-      dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
-                                       (uintptr_t)FnStart);
-    } else {
-      dbgs() << "JIT: Binary code:\n";
-      uint8_t* q = FnStart;
-      for (int i = 0; q < FnEnd; q += 4, ++i) {
-        if (i == 4)
-          i = 0;
-        if (i == 0)
-          dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
-        bool Done = false;
-        for (int j = 3; j >= 0; --j) {
-          if (q + j >= FnEnd)
-            Done = true;
-          else
-            dbgs() << (unsigned short)q[j];
+  DEBUG({
+      if (sys::hasDisassembler()) {
+        dbgs() << "JIT: Disassembled code:\n";
+        dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+                                         (uintptr_t)FnStart);
+      } else {
+        dbgs() << "JIT: Binary code:\n";
+        uint8_t* q = FnStart;
+        for (int i = 0; q < FnEnd; q += 4, ++i) {
+          if (i == 4)
+            i = 0;
+          if (i == 0)
+            dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
+          bool Done = false;
+          for (int j = 3; j >= 0; --j) {
+            if (q + j >= FnEnd)
+              Done = true;
+            else
+              dbgs() << (unsigned short)q[j];
+          }
+          if (Done)
+            break;
+          dbgs() << ' ';
+          if (i == 3)
+            dbgs() << '\n';
         }
-        if (Done)
-          break;
-        dbgs() << ' ';
-        if (i == 3)
-          dbgs() << '\n';
+        dbgs()<< '\n';
       }
-      dbgs()<< '\n';
-    }
-        );
+    });
 
-  if (DwarfExceptionHandling || JITEmitDebugInfo) {
+  if (JITExceptionHandling || JITEmitDebugInfo) {
     uintptr_t ActualSize = 0;
     SavedBufferBegin = BufferBegin;
     SavedBufferEnd = BufferEnd;
     SavedCurBufferPtr = CurBufferPtr;
 
-    if (MemMgr->NeedsExactSize()) {
+    if (MemMgr->NeedsExactSize())
       ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd);
-    }
 
     BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
                                                              ActualSize);
@@ -1277,7 +1282,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     BufferEnd = SavedBufferEnd;
     CurBufferPtr = SavedCurBufferPtr;
 
-    if (DwarfExceptionHandling) {
+    if (JITExceptionHandling) {
       TheJIT->RegisterTable(FrameRegister);
     }
 
@@ -1375,7 +1380,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
     ConstPoolAddresses.push_back(CAddr);
     if (CPE.isMachineConstantPoolEntry()) {
       // FIXME: add support to lower machine constant pool values into bytes!
-      llvm_report_error("Initialize memory with machine specific constant pool"
+      report_fatal_error("Initialize memory with machine specific constant pool"
                         "entry has not been implemented!");
     }
     TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index a17caa1..653e6f1 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
@@ -22,12 +23,9 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Memory.h"
-#include <map>
 #include <vector>
 #include <cassert>
 #include <climits>
-#include <cstdio>
-#include <cstdlib>
 #include <cstring>
 using namespace llvm;
 
@@ -614,8 +612,8 @@ sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
   sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
   sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
   if (B.base() == 0) {
-    llvm_report_error("Allocation failed when allocating new memory in the"
-                      " JIT\n" + ErrMsg);
+    report_fatal_error("Allocation failed when allocating new memory in the"
+                       " JIT\n" + Twine(ErrMsg));
   }
   LastSlab = B;
   ++NumSlabs;
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index 9c01b73..1ca084b 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -114,26 +114,43 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
     return;
   }
 
-  // Now we convert the line number information from the address/DebugLoc format
-  // in Details to the address/filename/lineno format that OProfile expects.
-  // OProfile 0.9.4 (and maybe later versions) has a bug that causes it to
-  // ignore line numbers for addresses above 4G.
-  FilenameCache Filenames;
-  std::vector<debug_line_info> LineInfo;
-  LineInfo.reserve(1 + Details.LineStarts.size());
-  if (!Details.MF->getDefaultDebugLoc().isUnknown()) {
-    LineInfo.push_back(LineStartToOProfileFormat(
-        *Details.MF, Filenames,
-        reinterpret_cast<uintptr_t>(FnStart),
-        Details.MF->getDefaultDebugLoc()));
-  }
-  for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+  if (!Details.LineStarts.empty()) {
+    // Now we convert the line number information from the address/DebugLoc
+    // format in Details to the address/filename/lineno format that OProfile
+    // expects.  Note that OProfile 0.9.4 has a bug that causes it to ignore
+    // line numbers for addresses above 4G.
+    FilenameCache Filenames;
+    std::vector<debug_line_info> LineInfo;
+    LineInfo.reserve(1 + Details.LineStarts.size());
+
+    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+    assert(!FirstLoc.isUnknown()
+           && "LineStarts should not contain unknown DebugLocs");
+    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+    if (FunctionDI.Verify()) {
+      // If we have debug info for the function itself, use that as the line
+      // number of the first several instructions.  Otherwise, after filling
+      // LineInfo, we'll adjust the address of the first line number to point at
+      // the start of the function.
+      debug_line_info line_info;
+      line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
+      line_info.lineno = FunctionDI.getLineNumber();
+      line_info.filename = Filenames.getFilename(FirstLocScope);
+      LineInfo.push_back(line_info);
+    }
+
+    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
            I = Details.LineStarts.begin(), E = Details.LineStarts.end();
-       I != E; ++I) {
-    LineInfo.push_back(LineStartToOProfileFormat(
-        *Details.MF, Filenames, I->Address, I->Loc));
-  }
-  if (!LineInfo.empty()) {
+         I != E; ++I) {
+      LineInfo.push_back(LineStartToOProfileFormat(
+                           *Details.MF, Filenames, I->Address, I->Loc));
+    }
+
+    // In case the function didn't have line info of its own, adjust the first
+    // line info's address to include the start of the function.
+    LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
+
     if (op_write_debug_line_info(Agent, FnStart,
                                  LineInfo.size(), &*LineInfo.begin()) == -1) {
       DEBUG(dbgs() 
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index f0da694..2b23994 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -35,6 +35,7 @@ MCAsmInfo::MCAsmInfo() {
   AssemblerDialect = 0;
   AllowQuotesInName = false;
   AllowNameToStartWithDigit = false;
+  AllowPeriodsInName = true;
   ZeroDirective = "\t.zero\t";
   AsciiDirective = "\t.ascii\t";
   AscizDirective = "\t.asciz\t";
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 03b8bd3..69afcc8 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -212,7 +212,7 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
   // resolved. This also works in conjunction with absolutized .set, which
   // requires the compiler to use .set to absolutize the differences between
   // symbols which the compiler knows to be assembly time constants, so we don't
-  // need to worry about consider symbol differences fully resolved.
+  // need to worry about considering symbol differences fully resolved.
 
   // Non-relative fixups are only resolved if constant.
   if (!BaseSection)
@@ -342,7 +342,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
   ++stats::EvaluateFixup;
 
   if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout))
-    llvm_report_error("expected relocatable expression");
+    report_fatal_error("expected relocatable expression");
 
   // FIXME: How do non-scattered symbols work in ELF? I presume the linker
   // doesn't support small relocations, but then under what criteria does the
@@ -466,12 +466,12 @@ uint64_t MCAssembler::LayoutSection(MCSectionData &SD,
 
       int64_t TargetLocation;
       if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
-        llvm_report_error("expected assembly-time absolute expression");
+        report_fatal_error("expected assembly-time absolute expression");
 
       // FIXME: We need a way to communicate this error.
       int64_t Offset = TargetLocation - FragmentOffset;
       if (Offset < 0)
-        llvm_report_error("invalid .org offset '" + Twine(TargetLocation) +
+        report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
                           "' (at offset '" + Twine(FragmentOffset) + "'");
 
       EffectiveSize = Offset;
@@ -526,7 +526,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // multiple .align directives to enforce the semantics it wants), but is
     // severe enough that we want to report it. How to handle this?
     if (Count * AF.getValueSize() != FragmentSize)
-      llvm_report_error("undefined .align directive, value size '" +
+      report_fatal_error("undefined .align directive, value size '" +
                         Twine(AF.getValueSize()) +
                         "' is not a divisor of padding size '" +
                         Twine(FragmentSize) + "'");
@@ -537,7 +537,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // If we are aligning with nops, ask that target to emit the right data.
     if (AF.getEmitNops()) {
       if (!Asm.getBackend().WriteNopData(Count, OW))
-        llvm_report_error("unable to write nop sequence of " +
+        report_fatal_error("unable to write nop sequence of " +
                           Twine(Count) + " bytes");
       break;
     }
@@ -662,7 +662,7 @@ void MCAssembler::Finish() {
   uint64_t StartOffset = OS.tell();
   llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS));
   if (!Writer)
-    llvm_report_error("unable to create object writer!");
+    report_fatal_error("unable to create object writer!");
 
   // Allow the object writer a chance to perform post-layout binding (for
   // example, to set the index fields in the symbol data).
@@ -715,6 +715,8 @@ bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup,
     return true;
 
   // Otherwise, relax if the value is too big for a (signed) i8.
+  //
+  // FIXME: This is target dependent!
   return int64_t(Value) != int64_t(int8_t(Value));
 }
 
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index e02cbc7..dc757bb 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -9,20 +9,35 @@
 
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+
+
 MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
+  MachOUniquingMap = 0;
+  ELFUniquingMap = 0;
 }
 
 MCContext::~MCContext() {
-  // NOTE: The sections are all allocated out of a bump pointer allocator,
+  // NOTE: The symbols are all allocated out of a bump pointer allocator,
   // we don't need to free them here.
+  
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)MachOUniquingMap;
+  delete (ELFUniqueMapTy*)ELFUniquingMap;
 }
 
+//===----------------------------------------------------------------------===//
+// Symbol Manipulation
+//===----------------------------------------------------------------------===//
+
 MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
   assert(!Name.empty() && "Normal symbols cannot be unnamed!");
   
@@ -55,3 +70,56 @@ MCSymbol *MCContext::CreateTempSymbol() {
 MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
   return Symbols.lookup(Name);
 }
+
+//===----------------------------------------------------------------------===//
+// Section Management
+//===----------------------------------------------------------------------===//
+
+const MCSectionMachO *MCContext::
+getMachOSection(StringRef Segment, StringRef Section,
+                unsigned TypeAndAttributes,
+                unsigned Reserved2, SectionKind Kind) {
+  
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+  
+  // Create the map if it doesn't already exist.
+  if (MachOUniquingMap == 0)
+    MachOUniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
+  
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+  
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+  if (Entry) return Entry;
+  
+  // Otherwise, return a new section.
+  return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
+                                            Reserved2, Kind);
+}
+
+
+const MCSection *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind, bool IsExplicit) {
+  if (ELFUniquingMap == 0)
+    ELFUniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
+  
+  // Do the lookup, if we have a hit, return it.
+  StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
+  
+  MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
+                                                  Kind, IsExplicit);
+  Entry.setValue(Result);
+  return Result;
+}
+
+
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 22c8d76..1183312 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -74,6 +74,11 @@ AsmToken AsmLexer::LexIdentifier() {
   while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
          *CurPtr == '.' || *CurPtr == '@')
     ++CurPtr;
+  
+  // Handle . as a special case.
+  if (CurPtr == TokStart+1 && TokStart[0] == '.')
+    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
+  
   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
 }
 
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 4e62689..a63d2e4 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -30,16 +30,10 @@ using namespace llvm;
 
 enum { DEFAULT_ADDRSPACE = 0 };
 
-// Mach-O section uniquing.
-//
-// FIXME: Figure out where this should live, it should be shared by
-// TargetLoweringObjectFile.
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
 AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
                      const MCAsmInfo &_MAI) 
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0),
-    CurBuffer(0), SectionUniquingMap(0) {
+    CurBuffer(0) {
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
   
   // Debugging directives.
@@ -51,39 +45,6 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
 
 
 AsmParser::~AsmParser() {
-  // If we have the MachO uniquing map, free it.
-  delete (MachOUniqueMapTy*)SectionUniquingMap;
-}
-
-const MCSection *AsmParser::getMachOSection(const StringRef &Segment,
-                                            const StringRef &Section,
-                                            unsigned TypeAndAttributes,
-                                            unsigned Reserved2,
-                                            SectionKind Kind) const {
-  // We unique sections by their segment/section pair.  The returned section
-  // may not have the same flags as the requested section, if so this should be
-  // diagnosed by the client as an error.
-  
-  // Create the map if it doesn't already exist.
-  if (SectionUniquingMap == 0)
-    SectionUniquingMap = new MachOUniqueMapTy();
-  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)SectionUniquingMap;
-  
-  // Form the name to look up.
-  SmallString<64> Name;
-  Name += Segment;
-  Name.push_back(',');
-  Name += Section;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = Map[Name.str()];
-
-  // FIXME: This should validate the type and attributes.
-  if (Entry) return Entry;
-
-  // Otherwise, return a new section.
-  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
-                                        Reserved2, Kind, Ctx);
 }
 
 void AsmParser::Warning(SMLoc L, const Twine &Msg) {
@@ -143,7 +104,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   //
   // FIXME: Target hook & command line option for initial section.
   if (!NoInitialTextSection)
-    Out.SwitchSection(getMachOSection("__TEXT", "__text",
+    Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text",
                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                       0, SectionKind::getText()));
 
@@ -156,29 +117,6 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
 
   // While we have input, parse each statement.
   while (Lexer.isNot(AsmToken::Eof)) {
-    // Handle conditional assembly here before calling ParseStatement()
-    if (Lexer.getKind() == AsmToken::Identifier) {
-      // If we have an identifier, handle it as the key symbol.
-      AsmToken ID = getTok();
-      SMLoc IDLoc = ID.getLoc();
-      StringRef IDVal = ID.getString();
-
-      if (IDVal == ".if" ||
-          IDVal == ".elseif" ||
-          IDVal == ".else" ||
-          IDVal == ".endif") {
-        if (!ParseConditionalAssemblyDirectives(IDVal, IDLoc))
-          continue;
-	HadError = true;
-	EatToEndOfStatement();
-	continue;
-      }
-    }
-    if (TheCondState.Ignore) {
-      EatToEndOfStatement();
-      continue;
-    }
-
     if (!ParseStatement()) continue;
   
     // We had an error, remember it and recover by skipping to the next line.
@@ -198,21 +136,6 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   return HadError;
 }
 
-/// ParseConditionalAssemblyDirectives - parse the conditional assembly
-/// directives
-bool AsmParser::ParseConditionalAssemblyDirectives(StringRef Directive,
-                                                   SMLoc DirectiveLoc) {
-  if (Directive == ".if")
-    return ParseDirectiveIf(DirectiveLoc);
-  if (Directive == ".elseif")
-    return ParseDirectiveElseIf(DirectiveLoc);
-  if (Directive == ".else")
-    return ParseDirectiveElse(DirectiveLoc);
-  if (Directive == ".endif")
-    return ParseDirectiveEndIf(DirectiveLoc);
-  return true;
-}
-
 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
 void AsmParser::EatToEndOfStatement() {
   while (Lexer.isNot(AsmToken::EndOfStatement) &&
@@ -248,6 +171,7 @@ MCSymbol *AsmParser::CreateSymbol(StringRef Name) {
 ///  primaryexpr ::= (parenexpr
 ///  primaryexpr ::= symbol
 ///  primaryexpr ::= number
+///  primaryexpr ::= '.'
 ///  primaryexpr ::= ~,+,- primaryexpr
 bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   switch (Lexer.getKind()) {
@@ -292,6 +216,17 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     EndLoc = Lexer.getLoc();
     Lex(); // Eat token.
     return false;
+  case AsmToken::Dot: {
+    // This is a '.' reference, which references the current PC.  Emit a
+    // temporary label to the streamer and refer to it.
+    MCSymbol *Sym = Ctx.CreateTempSymbol();
+    Out.EmitLabel(Sym);
+    Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    EndLoc = Lexer.getLoc();
+    Lex(); // Eat identifier.
+    return false;
+  }
+      
   case AsmToken::LParen:
     Lex(); // Eat the '('.
     return ParseParenExpr(Res, EndLoc);
@@ -484,9 +419,30 @@ bool AsmParser::ParseStatement() {
   AsmToken ID = getTok();
   SMLoc IDLoc = ID.getLoc();
   StringRef IDVal;
-  if (ParseIdentifier(IDVal))
-    return TokError("unexpected token at start of statement");
+  if (ParseIdentifier(IDVal)) {
+    if (!TheCondState.Ignore)
+      return TokError("unexpected token at start of statement");
+    IDVal = "";
+  }
 
+  // Handle conditional assembly here before checking for skipping.  We
+  // have to do this so that .endif isn't skipped in a ".if 0" block for
+  // example.
+  if (IDVal == ".if")
+    return ParseDirectiveIf(IDLoc);
+  if (IDVal == ".elseif")
+    return ParseDirectiveElseIf(IDLoc);
+  if (IDVal == ".else")
+    return ParseDirectiveElse(IDLoc);
+  if (IDVal == ".endif")
+    return ParseDirectiveEndIf(IDLoc);
+    
+  // If we are in a ".if 0" block, ignore this statement.
+  if (TheCondState.Ignore) {
+    EatToEndOfStatement();
+    return false;
+  }
+  
   // FIXME: Recurse on local labels?
 
   // See what kind of statement we have.
@@ -773,39 +729,38 @@ bool AsmParser::ParseStatement() {
     return false;
   }
 
-  
   SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
-  if (getTargetParser().ParseInstruction(IDVal, IDLoc, ParsedOperands))
-    // FIXME: Leaking ParsedOperands on failure.
-    return true;
-  
-  if (Lexer.isNot(AsmToken::EndOfStatement))
-    // FIXME: Leaking ParsedOperands on failure.
-    return TokError("unexpected token in argument list");
-
-  // Eat the end of statement marker.
-  Lex();
-  
-
-  MCInst Inst;
+  bool HadError = getTargetParser().ParseInstruction(IDVal, IDLoc,
+                                                     ParsedOperands);
+  if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
+    HadError = TokError("unexpected token in argument list");
+
+  // If parsing succeeded, match the instruction.
+  if (!HadError) {
+    MCInst Inst;
+    if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) {
+      // Emit the instruction on success.
+      Out.EmitInstruction(Inst);
+    } else {
+      // Otherwise emit a diagnostic about the match failure and set the error
+      // flag.
+      //
+      // FIXME: We should give nicer diagnostics about the exact failure.
+      Error(IDLoc, "unrecognized instruction");
+      HadError = true;
+    }
+  }
 
-  bool MatchFail = getTargetParser().MatchInstruction(ParsedOperands, Inst);
+  // If there was no error, consume the end-of-statement token. Otherwise this
+  // will be done by our caller.
+  if (!HadError)
+    Lex();
 
   // Free any parsed operands.
   for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
     delete ParsedOperands[i];
 
-  if (MatchFail) {
-    // FIXME: We should give nicer diagnostics about the exact failure.
-    Error(IDLoc, "unrecognized instruction");
-    return true;
-  }
-  
-  // Instruction is good, process it.
-  Out.EmitInstruction(Inst);
-  
-  // Skip to end of line for now.
-  return false;
+  return HadError;
 }
 
 bool AsmParser::ParseAssignment(const StringRef &Name) {
@@ -919,9 +874,9 @@ bool AsmParser::ParseDirectiveDarwinSection() {
   
   // FIXME: Arch specific.
   bool isText = Segment == "__TEXT";  // FIXME: Hack.
-  Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
-                                    isText ? SectionKind::getText()
-                                           : SectionKind::getDataRel()));
+  Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize,
+                                        isText ? SectionKind::getText()
+                                               : SectionKind::getDataRel()));
   return false;
 }
 
@@ -936,9 +891,9 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
   
   // FIXME: Arch specific.
   bool isText = StringRef(Segment) == "__TEXT";  // FIXME: Hack.
-  Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
-                                    isText ? SectionKind::getText()
-                                    : SectionKind::getDataRel()));
+  Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize,
+                                        isText ? SectionKind::getText()
+                                               : SectionKind::getDataRel()));
 
   // Set the implicit alignment, if any.
   //
@@ -1374,9 +1329,9 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
   // '.lcomm' is equivalent to '.zerofill'.
   // Create the Symbol as a common or local common with Size and Pow2Alignment
   if (IsLocal) {
-    Out.EmitZerofill(getMachOSection("__DATA", "__bss",
-                                     MCSectionMachO::S_ZEROFILL, 0,
-                                     SectionKind::getBSS()),
+    Out.EmitZerofill(Ctx.getMachOSection("__DATA", "__bss",
+                                         MCSectionMachO::S_ZEROFILL, 0,
+                                         SectionKind::getBSS()),
                      Sym, Size, 1 << Pow2Alignment);
     return false;
   }
@@ -1410,9 +1365,9 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
   // the section but with no symbol.
   if (Lexer.is(AsmToken::EndOfStatement)) {
     // Create the zerofill section but no symbol
-    Out.EmitZerofill(getMachOSection(Segment, Section,
-                                     MCSectionMachO::S_ZEROFILL, 0,
-                                     SectionKind::getBSS()));
+    Out.EmitZerofill(Ctx.getMachOSection(Segment, Section,
+                                         MCSectionMachO::S_ZEROFILL, 0,
+                                         SectionKind::getBSS()));
     return false;
   }
 
@@ -1468,9 +1423,9 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
   // Create the zerofill Symbol with Size and Pow2Alignment
   //
   // FIXME: Arch specific.
-  Out.EmitZerofill(getMachOSection(Segment, Section,
-                                 MCSectionMachO::S_ZEROFILL, 0,
-                                 SectionKind::getBSS()),
+  Out.EmitZerofill(Ctx.getMachOSection(Segment, Section,
+                                       MCSectionMachO::S_ZEROFILL, 0,
+                                       SectionKind::getBSS()),
                    Sym, Size, 1 << Pow2Alignment);
 
   return false;
@@ -1604,9 +1559,6 @@ bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) {
 /// ParseDirectiveIf
 /// ::= .if expression
 bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
-  // Consume the identifier that was the .if directive
-  Lex();
-
   TheCondStack.push_back(TheCondState);
   TheCondState.TheCond = AsmCond::IfCond;
   if(TheCondState.Ignore) {
@@ -1638,9 +1590,6 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
                           " an .elseif");
   TheCondState.TheCond = AsmCond::ElseIfCond;
 
-  // Consume the identifier that was the .elseif directive
-  Lex();
-
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
       LastIgnoreState = TheCondStack.back().Ignore;
@@ -1667,9 +1616,6 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
 /// ParseDirectiveElse
 /// ::= .else
 bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
-  // Consume the identifier that was the .else directive
-  Lex();
-
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.else' directive");
   
@@ -1694,9 +1640,6 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
 /// ParseDirectiveEndIf
 /// ::= .endif
 bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
-  // Consume the identifier that was the .endif directive
-  Lex();
-
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.endif' directive");
   
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index ebfe269..a7599de 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -14,11 +14,7 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-MCSectionELF *MCSectionELF::
-Create(StringRef Section, unsigned Type, unsigned Flags,
-       SectionKind K, bool isExplicit, MCContext &Ctx) {
-  return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit);
-}
+MCSectionELF::~MCSectionELF() {} // anchor.
 
 // ShouldOmitSectionDirective - Decides whether a '.section' directive
 // should be printed before the section name
@@ -62,59 +58,63 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
       OS << ",#write";
     if (Flags & MCSectionELF::SHF_TLS)
       OS << ",#tls";
-  } else {
-    OS << ",\"";
-    if (Flags & MCSectionELF::SHF_ALLOC)
-      OS << 'a';
-    if (Flags & MCSectionELF::SHF_EXECINSTR)
-      OS << 'x';
-    if (Flags & MCSectionELF::SHF_WRITE)
-      OS << 'w';
-    if (Flags & MCSectionELF::SHF_MERGE)
-      OS << 'M';
-    if (Flags & MCSectionELF::SHF_STRINGS)
-      OS << 'S';
-    if (Flags & MCSectionELF::SHF_TLS)
-      OS << 'T';
-    
-    // If there are target-specific flags, print them.
-    if (Flags & ~MCSectionELF::TARGET_INDEP_SHF)
-      PrintTargetSpecificSectionFlags(MAI, OS);
-    
-    OS << '"';
+    OS << '\n';
+    return;
+  }
+  
+  OS << ",\"";
+  if (Flags & MCSectionELF::SHF_ALLOC)
+    OS << 'a';
+  if (Flags & MCSectionELF::SHF_EXECINSTR)
+    OS << 'x';
+  if (Flags & MCSectionELF::SHF_WRITE)
+    OS << 'w';
+  if (Flags & MCSectionELF::SHF_MERGE)
+    OS << 'M';
+  if (Flags & MCSectionELF::SHF_STRINGS)
+    OS << 'S';
+  if (Flags & MCSectionELF::SHF_TLS)
+    OS << 'T';
+  
+  // If there are target-specific flags, print them.
+  if (Flags & MCSectionELF::XCORE_SHF_CP_SECTION)
+    OS << 'c';
+  if (Flags & MCSectionELF::XCORE_SHF_DP_SECTION)
+    OS << 'd';
+  
+  OS << '"';
 
-    if (ShouldPrintSectionType(Type)) {
-      OS << ',';
-   
-      // If comment string is '@', e.g. as on ARM - use '%' instead
-      if (MAI.getCommentString()[0] == '@')
-        OS << '%';
-      else
-        OS << '@';
-    
-      if (Type == MCSectionELF::SHT_INIT_ARRAY)
-        OS << "init_array";
-      else if (Type == MCSectionELF::SHT_FINI_ARRAY)
-        OS << "fini_array";
-      else if (Type == MCSectionELF::SHT_PREINIT_ARRAY)
-        OS << "preinit_array";
-      else if (Type == MCSectionELF::SHT_NOBITS)
-        OS << "nobits";
-      else if (Type == MCSectionELF::SHT_PROGBITS)
-        OS << "progbits";
-    
-      if (getKind().isMergeable1ByteCString()) {
-        OS << ",1";
-      } else if (getKind().isMergeable2ByteCString()) {
-        OS << ",2";
-      } else if (getKind().isMergeable4ByteCString() || 
-                 getKind().isMergeableConst4()) {
-        OS << ",4";
-      } else if (getKind().isMergeableConst8()) {
-        OS << ",8";
-      } else if (getKind().isMergeableConst16()) {
-        OS << ",16";
-      }
+  if (ShouldPrintSectionType(Type)) {
+    OS << ',';
+ 
+    // If comment string is '@', e.g. as on ARM - use '%' instead
+    if (MAI.getCommentString()[0] == '@')
+      OS << '%';
+    else
+      OS << '@';
+  
+    if (Type == MCSectionELF::SHT_INIT_ARRAY)
+      OS << "init_array";
+    else if (Type == MCSectionELF::SHT_FINI_ARRAY)
+      OS << "fini_array";
+    else if (Type == MCSectionELF::SHT_PREINIT_ARRAY)
+      OS << "preinit_array";
+    else if (Type == MCSectionELF::SHT_NOBITS)
+      OS << "nobits";
+    else if (Type == MCSectionELF::SHT_PROGBITS)
+      OS << "progbits";
+  
+    if (getKind().isMergeable1ByteCString()) {
+      OS << ",1";
+    } else if (getKind().isMergeable2ByteCString()) {
+      OS << ",2";
+    } else if (getKind().isMergeable4ByteCString() || 
+               getKind().isMergeableConst4()) {
+      OS << ",4";
+    } else if (getKind().isMergeableConst8()) {
+      OS << ",8";
+    } else if (getKind().isMergeableConst16()) {
+      OS << ",16";
     }
   }
   
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 6cc67a2..3a18cee 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -64,14 +64,22 @@ ENTRY(0 /*FIXME*/,           S_ATTR_LOC_RELOC)
   { AttrFlagEnd, 0, 0 }
 };
 
-
-MCSectionMachO *MCSectionMachO::
-Create(StringRef Segment, StringRef Section,
-       unsigned TypeAndAttributes, unsigned Reserved2,
-       SectionKind K, MCContext &Ctx) {
-  // S_SYMBOL_STUBS must be set for Reserved2 to be non-zero.
-  return new (Ctx) MCSectionMachO(Segment, Section, TypeAndAttributes,
-                                  Reserved2, K);
+MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
+                               unsigned TAA, unsigned reserved2, SectionKind K)
+  : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+  assert(Segment.size() <= 16 && Section.size() <= 16 &&
+         "Segment or section string too long");
+  for (unsigned i = 0; i != 16; ++i) {
+    if (i < Segment.size())
+      SegmentName[i] = Segment[i];
+    else
+      SegmentName[i] = 0;
+    
+    if (i < Section.size())
+      SectionName[i] = Section[i];
+    else
+      SectionName[i] = 0;
+  }        
 }
 
 void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index e073eb5..a533ccf 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -418,7 +418,7 @@ public:
         unsigned Log2Size = Log2_32(Align);
         assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
         if (Log2Size > 15)
-          llvm_report_error("invalid 'common' alignment '" +
+          report_fatal_error("invalid 'common' alignment '" +
                             Twine(Align) + "'");
         // FIXME: Keep this mask with the SymbolFlags enumeration.
         Flags = (Flags & 0xF0FF) | (Log2Size << 8);
@@ -477,7 +477,7 @@ public:
       // actual expression addend without the PCrel bias. However, instructions
       // with data following the relocation are not accomodated for (see comment
       // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
-      Value += 1 << Log2Size;
+      Value += 1LL << Log2Size;
     }
 
     if (Target.isAbsolute()) { // constant
@@ -506,23 +506,23 @@ public:
       // Neither symbol can be modified.
       if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
           Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
-        llvm_report_error("unsupported relocation of modified symbol");
+        report_fatal_error("unsupported relocation of modified symbol");
 
       // We don't support PCrel relocations of differences. Darwin 'as' doesn't
       // implement most of these correctly.
       if (IsPCRel)
-        llvm_report_error("unsupported pc-relative relocation of difference");
+        report_fatal_error("unsupported pc-relative relocation of difference");
 
       // We don't currently support any situation where one or both of the
       // symbols would require a local relocation. This is almost certainly
       // unused and may not be possible to encode correctly.
       if (!A_Base || !B_Base)
-        llvm_report_error("unsupported local relocations in difference");
+        report_fatal_error("unsupported local relocations in difference");
 
       // Darwin 'as' doesn't emit correct relocations for this (it ends up with
       // a single SIGNED relocation); reject it for now.
       if (A_Base == B_Base)
-        llvm_report_error("unsupported relocation with identical base");
+        report_fatal_error("unsupported relocation with identical base");
 
       Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base);
       Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base);
@@ -580,12 +580,12 @@ public:
             else
               Type = RIT_X86_64_GOT;
           } else if (Modifier != MCSymbolRefExpr::VK_None)
-            llvm_report_error("unsupported symbol modifier in relocation");
+            report_fatal_error("unsupported symbol modifier in relocation");
           else
             Type = RIT_X86_64_Signed;
         } else {
           if (Modifier != MCSymbolRefExpr::VK_None)
-            llvm_report_error("unsupported symbol modifier in branch "
+            report_fatal_error("unsupported symbol modifier in branch "
                               "relocation");
 
           Type = RIT_X86_64_Branch;
@@ -605,7 +605,7 @@ public:
         // well based on the actual encoded instruction (the additional bias),
         // but instead appear to just look at the final offset.
         if (IsRIPRel) {
-          switch (-(Target.getConstant() + (1 << Log2Size))) {
+          switch (-(Target.getConstant() + (1LL << Log2Size))) {
           case 1: Type = RIT_X86_64_Signed1; break;
           case 2: Type = RIT_X86_64_Signed2; break;
           case 4: Type = RIT_X86_64_Signed4; break;
@@ -622,7 +622,7 @@ public:
           Type = RIT_X86_64_GOT;
           IsPCRel = 1;
         } else if (Modifier != MCSymbolRefExpr::VK_None)
-          llvm_report_error("unsupported symbol modifier in relocation");
+          report_fatal_error("unsupported symbol modifier in relocation");
         else
           Type = RIT_X86_64_Unsigned;
       }
@@ -657,7 +657,7 @@ public:
     MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
     if (!A_SD->getFragment())
-      llvm_report_error("symbol '" + A->getName() +
+      report_fatal_error("symbol '" + A->getName() +
                         "' can not be undefined in a subtraction expression");
 
     uint32_t Value = Layout.getSymbolAddress(A_SD);
@@ -667,7 +667,7 @@ public:
       MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
 
       if (!B_SD->getFragment())
-        llvm_report_error("symbol '" + B->getSymbol().getName() +
+        report_fatal_error("symbol '" + B->getSymbol().getName() +
                           "' can not be undefined in a subtraction expression");
 
       // Select the appropriate difference relocation type.
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 31b45c8..90df262 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -23,9 +23,7 @@ namespace llvm {
 BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
                                    SlabAllocator &allocator)
     : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
-      CurSlab(0), BytesAllocated(0) {
-  StartNewSlab();
-}
+      CurSlab(0), BytesAllocated(0) { }
 
 BumpPtrAllocator::~BumpPtrAllocator() {
   DeallocateSlabs(CurSlab);
@@ -72,6 +70,8 @@ void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) {
 /// Reset - Deallocate all but the current slab and reset the current pointer
 /// to the beginning of it, freeing all memory allocated so far.
 void BumpPtrAllocator::Reset() {
+  if (!CurSlab)
+    return;
   DeallocateSlabs(CurSlab->NextPtr);
   CurSlab->NextPtr = 0;
   CurPtr = (char*)(CurSlab + 1);
@@ -81,6 +81,9 @@ void BumpPtrAllocator::Reset() {
 /// Allocate - Allocate space at the specified alignment.
 ///
 void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
+  if (!CurSlab) // Start a new slab if we haven't allocated one already.
+    StartNewSlab();
+
   // Keep track of how many bytes we've allocated.
   BytesAllocated += Size;
 
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index eccfa0b..7f48f8a 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -51,12 +51,19 @@ DebugBufferSize("debug-buffer-size",
                 cl::init(0));
 
 static std::string CurrentDebugType;
-static struct DebugOnlyOpt {
+
+namespace {
+
+struct DebugOnlyOpt {
   void operator=(const std::string &Val) const {
     DebugFlag |= !Val.empty();
     CurrentDebugType = Val;
   }
-} DebugOnlyOptLoc;
+};
+
+}
+
+static DebugOnlyOpt DebugOnlyOptLoc;
 
 static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
 DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index d1230b9..c19c2d6 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -196,8 +196,9 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_APPLE_flags:                return "DW_AT_APPLE_flags";
   case DW_AT_APPLE_isa:                  return "DW_AT_APPLE_isa";
   case DW_AT_APPLE_block:                return "DW_AT_APPLE_block";
-  case DW_AT_APPLE_major_runtime_vers:  return "DW_AT_APPLE_major_runtime_vers";
+  case DW_AT_APPLE_major_runtime_vers:   return "DW_AT_APPLE_major_runtime_vers";
   case DW_AT_APPLE_runtime_class:        return "DW_AT_APPLE_runtime_class";
+  case DW_AT_APPLE_omit_frame_ptr:       return "DW_AT_APPLE_omit_frame_ptr";
   }
   return 0;
 }
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 4412cb2..56a171c 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -1,4 +1,4 @@
-//===- lib/Support/ErrorHandling.cpp - Callbacks for errors -----*- C++ -*-===//
+//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines an API for error handling, it supersedes cerr+abort(), and
-// cerr+exit() style error handling.
-// Callbacks can be registered for these errors through this API.
+// This file defines an API used to indicate fatal error conditions.  Non-fatal
+// errors (most of them) should be handled through LLVMContext.
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Twine.h"
@@ -19,16 +19,14 @@
 #include "llvm/System/Threading.h"
 #include <cassert>
 #include <cstdlib>
-
 using namespace llvm;
 using namespace std;
 
-static llvm_error_handler_t ErrorHandler = 0;
+static fatal_error_handler_t ErrorHandler = 0;
 static void *ErrorHandlerUserData = 0;
 
-namespace llvm {
-void llvm_install_error_handler(llvm_error_handler_t handler,
-                                void *user_data) {
+void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
+                                       void *user_data) {
   assert(!llvm_is_multithreaded() &&
          "Cannot register error handlers after starting multithreaded mode!\n");
   assert(!ErrorHandler && "Error handler already registered!\n");
@@ -36,19 +34,19 @@ void llvm_install_error_handler(llvm_error_handler_t handler,
   ErrorHandlerUserData = user_data;
 }
 
-void llvm_remove_error_handler() {
+void llvm::remove_fatal_error_handler() {
   ErrorHandler = 0;
 }
 
-void llvm_report_error(const char *reason) {
-  llvm_report_error(Twine(reason));
+void llvm::report_fatal_error(const char *reason) {
+  report_fatal_error(Twine(reason));
 }
 
-void llvm_report_error(const std::string &reason) {
-  llvm_report_error(Twine(reason));
+void llvm::report_fatal_error(const std::string &reason) {
+  report_fatal_error(Twine(reason));
 }
 
-void llvm_report_error(const Twine &reason) {
+void llvm::report_fatal_error(const Twine &reason) {
   if (!ErrorHandler) {
     errs() << "LLVM ERROR: " << reason << "\n";
   } else {
@@ -57,8 +55,8 @@ void llvm_report_error(const Twine &reason) {
   exit(1);
 }
 
-void llvm_unreachable_internal(const char *msg, const char *file,
-                               unsigned line) {
+void llvm::llvm_unreachable_internal(const char *msg, const char *file,
+                                     unsigned line) {
   // This code intentionally doesn't call the ErrorHandler callback, because
   // llvm_unreachable is intended to be used to indicate "impossible"
   // situations, and not legitimate runtime errors.
@@ -70,4 +68,3 @@ void llvm_unreachable_internal(const char *msg, const char *file,
   dbgs() << "!\n";
   abort();
 }
-}
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index ec84f9b..fdd6285 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -130,28 +130,28 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
      errs() << "Error viewing graph " << Filename.str() << ": '"
             << ErrMsg << "\n";
-  } else {
-    errs() << " done. \n";
+    return;
+  }
+  errs() << " done. \n";
 
-    sys::Path gv(LLVM_PATH_GV);
-    args.clear();
-    args.push_back(gv.c_str());
-    args.push_back(PSFilename.c_str());
-    args.push_back("--spartan");
-    args.push_back(0);
-    
-    ErrMsg.clear();
-    if (wait) {
-       if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
-          errs() << "Error viewing graph: " << ErrMsg << "\n";
-       Filename.eraseFromDisk();
-       PSFilename.eraseFromDisk();
-    }
-    else {
-       sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
-       errs() << "Remember to erase graph files: " << Filename.str() << " "
-              << PSFilename.str() << "\n";
-    }
+  sys::Path gv(LLVM_PATH_GV);
+  args.clear();
+  args.push_back(gv.c_str());
+  args.push_back(PSFilename.c_str());
+  args.push_back("--spartan");
+  args.push_back(0);
+  
+  ErrMsg.clear();
+  if (wait) {
+     if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
+        errs() << "Error viewing graph: " << ErrMsg << "\n";
+     Filename.eraseFromDisk();
+     PSFilename.eraseFromDisk();
+  }
+  else {
+     sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
+     errs() << "Remember to erase graph files: " << Filename.str() << " "
+            << PSFilename.str() << "\n";
   }
 #elif HAVE_DOTTY
   sys::Path dotty(LLVM_PATH_DOTTY);
@@ -166,7 +166,8 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
      errs() << "Error viewing graph " << Filename.str() << ": "
             << ErrMsg << "\n";
   } else {
-#ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns
+// Dotty spawns another app and doesn't wait until it returns
+#if defined (__MINGW32__) || defined (_WINDOWS)
     return;
 #endif
     Filename.eraseFromDisk();
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 4e7520c..da5681c 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -168,7 +168,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
   }
   PrintedMsg += Msg;
 
-  return SMDiagnostic(Loc,
+  return SMDiagnostic(*this, Loc,
                       CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
                       Loc.getPointer()-LineStart, PrintedMsg,
                       LineStr, ShowLine);
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 4fac073..481f6ba 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -190,6 +190,8 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
 //   NamedRegionTimer Implementation
 //===----------------------------------------------------------------------===//
 
+namespace {
+
 typedef StringMap<Timer> Name2TimerMap;
 
 class Name2PairMap {
@@ -216,6 +218,8 @@ public:
   }
 };
 
+}
+
 static ManagedStatic<Name2TimerMap> NamedTimers;
 static ManagedStatic<Name2PairMap> NamedGroupedTimers;
 
diff --git a/lib/Support/circular_raw_ostream.cpp b/lib/Support/circular_raw_ostream.cpp
index e52996d..ca0d30d 100644
--- a/lib/Support/circular_raw_ostream.cpp
+++ b/lib/Support/circular_raw_ostream.cpp
@@ -1,4 +1,4 @@
-//===- circulat_raw_ostream.cpp - Implement the circular_raw_ostream class -===//
+//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,9 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/circular_raw_ostream.h"
-
 #include <algorithm>
-
 using namespace llvm;
 
 void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
@@ -25,7 +23,8 @@ void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
 
   // Write into the buffer, wrapping if necessary.
   while (Size != 0) {
-    unsigned Bytes = std::min(Size, BufferSize - (Cur - BufferArray));
+    unsigned Bytes =
+      std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray)));
     memcpy(Cur, Ptr, Bytes);
     Size -= Bytes;
     Cur += Bytes;
@@ -37,11 +36,10 @@ void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
   }    
 }
 
-void circular_raw_ostream::flushBufferWithBanner(void) {
+void circular_raw_ostream::flushBufferWithBanner() {
   if (BufferSize != 0) {
     // Write out the buffer
-    int num = std::strlen(Banner); 
-    TheStream->write(Banner, num);
+    TheStream->write(Banner, std::strlen(Banner));
     flushBuffer();
   }
 }
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index f59bd0d..0b05c54 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -57,11 +57,11 @@ raw_ostream::~raw_ostream() {
     delete [] OutBufStart;
 
   // If there are any pending errors, report them now. Clients wishing
-  // to avoid llvm_report_error calls should check for errors with
+  // to avoid report_fatal_error calls should check for errors with
   // has_error() and clear the error flag with clear_error() before
   // destructing raw_ostream objects which may have errors.
   if (Error)
-    llvm_report_error("IO failure on output stream.");
+    report_fatal_error("IO failure on output stream.");
 }
 
 // An out of line virtual method to provide a home for the class vtable.
@@ -442,7 +442,8 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
 }
 
 size_t raw_fd_ostream::preferred_buffer_size() const {
-#if !defined(_MSC_VER) && !defined(__MINGW32__) // Windows has no st_blksize.
+#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_MINIX)
+  // Windows and Minix have no st_blksize.
   assert(FD >= 0 && "File not yet open!");
   struct stat statbuf;
   if (fstat(FD, &statbuf) != 0)
diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc
index bf55543..7e41f96 100644
--- a/lib/Support/regengine.inc
+++ b/lib/Support/regengine.inc
@@ -185,7 +185,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 		endp = fast(m, start, stop, gf, gl);
 		if (endp == NULL) {		/* a miss */
 			free(m->pmatch);
-			free(m->lastpos);
+			free((void*)m->lastpos);
 			STATETEARDOWN(m);
 			return(REG_NOMATCH);
 		}
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index d6f3140..6f6890c 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -24,12 +24,18 @@
 // Collection of symbol name/value pairs to be searched prior to any libraries.
 static std::map<std::string, void*> *ExplicitSymbols = 0;
 
-static struct ExplicitSymbolsDeleter {
+namespace {
+
+struct ExplicitSymbolsDeleter {
   ~ExplicitSymbolsDeleter() {
     if (ExplicitSymbols)
       delete ExplicitSymbols;
   }
-} Dummy;
+};
+
+}
+
+static ExplicitSymbolsDeleter Dummy;
 
 void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
                                           void *symbolValue) {
@@ -44,6 +50,7 @@ void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
 
 #else
 
+#if HAVE_DLFCN_H
 #include <dlfcn.h>
 using namespace llvm;
 using namespace llvm::sys;
@@ -68,6 +75,17 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
   OpenedHandles->push_back(H);
   return false;
 }
+#else
+
+using namespace llvm;
+using namespace llvm::sys;
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
+  return true;
+}
+#endif
 
 namespace llvm {
 void *SearchForAddressOfSpecialSymbol(const char* symbolName);
@@ -84,6 +102,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
       return I->second;
   }
 
+#if HAVE_DLFCN_H
   // Now search the libraries.
   if (OpenedHandles) {
     for (std::vector<void *>::iterator I = OpenedHandles->begin(),
@@ -95,6 +114,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
       }
     }
   }
+#endif
 
   if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
     return Result;
diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp
index a3049d4..cd58c2c 100644
--- a/lib/System/Program.cpp
+++ b/lib/System/Program.cpp
@@ -13,8 +13,7 @@
 
 #include "llvm/System/Program.h"
 #include "llvm/Config/config.h"
-
-namespace llvm {
+using namespace llvm;
 using namespace sys;
 
 //===----------------------------------------------------------------------===//
@@ -48,9 +47,6 @@ Program::ExecuteNoWait(const Path& path,
   prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
 }
 
-
-}
-
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Program.inc"
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index 52253b3..74596dc 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -858,15 +858,20 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
 
   // Append an XXXXXX pattern to the end of the file for use with mkstemp,
   // mktemp or our own implementation.
-  std::string Buf(path);
+  // This uses std::vector instead of SmallVector to avoid a dependence on
+  // libSupport. And performance isn't critical here.
+  std::vector<char> Buf;
+  Buf.resize(path.size()+8);
+  char *FNBuffer = &Buf[0];
+    path.copy(FNBuffer,path.size());
   if (isDirectory())
-    Buf += "/XXXXXX";
+    strcpy(FNBuffer+path.size(), "/XXXXXX");
   else
-    Buf += "-XXXXXX";
+    strcpy(FNBuffer+path.size(), "-XXXXXX");
 
 #if defined(HAVE_MKSTEMP)
   int TempFD;
-  if ((TempFD = mkstemp((char*)Buf.c_str())) == -1)
+  if ((TempFD = mkstemp(FNBuffer)) == -1)
     return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
 
   // We don't need to hold the temp file descriptor... we will trust that no one
@@ -874,21 +879,21 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
   close(TempFD);
 
   // Save the name
-  path = Buf;
+  path = FNBuffer;
 #elif defined(HAVE_MKTEMP)
   // If we don't have mkstemp, use the old and obsolete mktemp function.
-  if (mktemp(Buf.c_str()) == 0)
+  if (mktemp(FNBuffer) == 0)
     return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
 
   // Save the name
-  path = Buf;
+  path = FNBuffer;
 #else
   // Okay, looks like we have to do it all by our lonesome.
   static unsigned FCounter = 0;
   unsigned offset = path.size() + 1;
-  while (FCounter < 999999 && exists()) {
-    sprintf(Buf.data()+offset, "%06u", ++FCounter);
-    path = Buf;
+  while ( FCounter < 999999 && exists()) {
+    sprintf(FNBuffer+offset,"%06u",++FCounter);
+    path = FNBuffer;
   }
   if (FCounter > 999999)
     return MakeErrMsg(ErrMsg,
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index b4cc875..358415f 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -30,6 +30,14 @@
 #if HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
+#ifdef HAVE_POSIX_SPAWN
+#include <spawn.h>
+#if !defined(__APPLE__)
+  extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+#endif
 
 namespace llvm {
 using namespace sys;
@@ -94,20 +102,19 @@ Program::FindProgramByName(const std::string& progName) {
 }
 
 static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
-  if (Path == 0)
-    // Noop
+  if (Path == 0) // Noop
     return false;
-  std::string File;
+  const char *File;
   if (Path->isEmpty())
     // Redirect empty paths to /dev/null
     File = "/dev/null";
   else
-    File = Path->str();
+    File = Path->c_str();
 
   // Open the file
-  int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+  int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
   if (InFD == -1) {
-    MakeErrMsg(ErrMsg, "Cannot open file '" + File + "' for "
+    MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
               + (FD == 0 ? "input" : "output"));
     return true;
   }
@@ -122,6 +129,25 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
   return false;
 }
 
+#ifdef HAVE_POSIX_SPAWN
+static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
+                          posix_spawn_file_actions_t &FileActions) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD,
+                            File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
+    return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+  return false;
+}
+#endif
+
 static void TimeOutHandler(int Sig) {
 }
 
@@ -151,13 +177,55 @@ static void SetMemoryLimits (unsigned size)
 }
 
 bool
-Program::Execute(const Path& path,
-                 const char** args,
-                 const char** envp,
-                 const Path** redirects,
-                 unsigned memoryLimit,
-                 std::string* ErrMsg)
-{
+Program::Execute(const Path &path, const char **args, const char **envp,
+                 const Path **redirects, unsigned memoryLimit,
+                  std::string *ErrMsg) {
+  // If this OS has posix_spawn and there is no memory limit being implied, use
+  // posix_spawn.  It is more efficient than fork/exec.
+#ifdef HAVE_POSIX_SPAWN
+  if (memoryLimit == 0) {
+    posix_spawn_file_actions_t FileActions;
+    posix_spawn_file_actions_init(&FileActions);
+
+    if (redirects) {
+      // Redirect stdin/stdout.
+      if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
+          RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
+        return false;
+      if (redirects[1] == 0 || redirects[2] == 0 ||
+          *redirects[1] != *redirects[2]) {
+        // Just redirect stderr
+        if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
+      } else {       
+        // If stdout and stderr should go to the same place, redirect stderr
+        // to the FD already open for stdout.
+        if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2))
+          return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
+      }
+    }
+
+    if (!envp)
+#if !defined(__APPLE__)
+      envp = const_cast<const char **>(environ);
+#else
+      // environ is missing in dylibs.
+      envp = const_cast<const char **>(*_NSGetEnviron());
+#endif
+
+    pid_t PID;
+    int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0,
+                          const_cast<char **>(args), const_cast<char **>(envp));
+                          
+    posix_spawn_file_actions_destroy(&FileActions);
+
+    if (Err)
+     return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
+      
+    Data_ = reinterpret_cast<void*>(PID);
+    return true;
+  }
+#endif
+  
   if (!path.canExecute()) {
     if (ErrMsg)
       *ErrMsg = path.str() + " is not executable";
@@ -201,9 +269,12 @@ Program::Execute(const Path& path,
 
       // Execute!
       if (envp != 0)
-        execve(path.c_str(), (char**)args, (char**)envp);
+        execve(path.c_str(),
+               const_cast<char **>(args),
+               const_cast<char **>(envp));
       else
-        execv(path.c_str(), (char**)args);
+        execv(path.c_str(),
+              const_cast<char **>(args));
       // If the execve() failed, we should exit. Follow Unix protocol and
       // return 127 if the executable was not found, and 126 otherwise.
       // Use _exit rather than exit so that atexit functions and static
@@ -239,7 +310,9 @@ Program::Wait(unsigned secondsToWait,
   // fact of having a handler at all causes the wait below to return with EINTR,
   // unlike if we used SIG_IGN.
   if (secondsToWait) {
+#ifndef __HAIKU__
     Act.sa_sigaction = 0;
+#endif
     Act.sa_handler = TimeOutHandler;
     sigemptyset(&Act.sa_mask);
     Act.sa_flags = 0;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 8d9c622..b4dec0c 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -124,7 +124,8 @@ def : Processor<"arm1156t2f-s",    ARMV6Itineraries,
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
                  FeatureNEONForFP]>;
-def : ProcNoItin<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : Processor<"cortex-a9",        CortexA9Itineraries,
+                [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index ea62c33..e68354a 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -151,22 +151,13 @@ namespace ARM_AM {
     if ((rotr32(Imm, RotAmt) & ~255U) == 0)
       return (32-RotAmt)&31;  // HW rotates right, not left.
 
-    // For values like 0xF000000F, we should skip the first run of ones, then
+    // For values like 0xF000000F, we should ignore the low 6 bits, then
     // retry the hunt.
-    if (Imm & 1) {
-      unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
-      if (TrailingOnes != 32) {  // Avoid overflow on 0xFFFFFFFF
-        // Restart the search for a high-order bit after the initial seconds of
-        // ones.
-        unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
-
-        // Rotate amount must be even.
-        unsigned RotAmt2 = TZ2 & ~1;
-
-        // If this fits, use it.
-        if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
-          return (32-RotAmt2)&31;  // HW rotates right, not left.
-      }
+    if (Imm & 63U) {
+      unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+      unsigned RotAmt2 = TZ2 & ~1;
+      if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+        return (32-RotAmt2)&31;  // HW rotates right, not left.
     }
 
     // Otherwise, we have no way to cover this span of bits with a single
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1995f79..a193858 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -467,6 +467,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case TargetOpcode::KILL:
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
       return 0;
     }
     break;
@@ -481,10 +482,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // operand #2.
       return MI->getOperand(2).getImm();
     case ARM::Int_eh_sjlj_setjmp:
+    case ARM::Int_eh_sjlj_setjmp_nofp:
       return 24;
     case ARM::tInt_eh_sjlj_setjmp:
-      return 14;
     case ARM::t2Int_eh_sjlj_setjmp:
+    case ARM::t2Int_eh_sjlj_setjmp_nofp:
       return 14;
     case ARM::BR_JTr:
     case ARM::BR_JTm:
@@ -815,6 +817,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                           int FrameIx, uint64_t Offset,
+                                           const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 MachineInstr *ARMBaseInstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 292c498..7a5630e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -269,6 +269,12 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
 
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
   virtual bool canFoldMemoryOperand(const MachineInstr *MI,
                                     const SmallVectorImpl<unsigned> &Ops) const;
 
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index f162546..bc12187 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -38,11 +38,14 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
-using namespace llvm;
 
+namespace llvm {
 cl::opt<bool>
 ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
           cl::desc("Reuse repeated frame index values"));
+}
+
+using namespace llvm;
 
 unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
                                                    bool *isSPVFP) {
@@ -478,7 +481,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 ///
 bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return ((NoFramePointerElim && MFI->hasCalls())||
+  return ((DisableFramePointerElim(MF) && MFI->hasCalls())||
           needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
@@ -506,7 +509,7 @@ needsStackRealignment(const MachineFunction &MF) const {
 bool ARMBaseRegisterInfo::
 cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (NoFramePointerElim && MFI->hasCalls())
+  if (DisableFramePointerElim(MF) && MFI->hasCalls())
     return true;
   return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
     || needsStackRealignment(MF);
@@ -1050,7 +1053,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
                   unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C =
+  const Constant *C =
         ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
@@ -1180,6 +1183,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     SPAdj = 0;
   Offset += SPAdj;
 
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return 0;
+  }
+
   // Modify MI as necessary to handle as much of 'Offset' as possible
   bool Done = false;
   if (!AFI->isThumbFunction())
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index e7aa0c8..f84f85a 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -64,7 +64,8 @@ namespace {
     static char ID;
   public:
     ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
-      : MachineFunctionPass(&ID), JTI(0), II((ARMInstrInfo*)tm.getInstrInfo()),
+      : MachineFunctionPass(&ID), JTI(0),
+        II((const ARMInstrInfo *)tm.getInstrInfo()),
         TD(tm.getTargetData()), TM(tm),
     MCE(mce), MCPEs(0), MJTEs(0),
     IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -150,7 +151,7 @@ namespace {
 
     /// Routines that handle operands which add machine relocations which are
     /// fixed up by the relocation stage.
-    void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                            bool MayNeedFarStub,  bool Indirect,
                            intptr_t ACPV = 0);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
@@ -174,9 +175,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
-  JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo();
-  II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo();
-  TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
+  JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
+  II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
+  TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
   MCPEs = &MF.getConstantPool()->getConstants();
   MJTEs = 0;
@@ -249,14 +250,16 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
 
 /// emitGlobalAddress - Emit the specified address to the code stream.
 ///
-void ARMCodeEmitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                                        bool MayNeedFarStub, bool Indirect,
                                        intptr_t ACPV) {
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           GV, ACPV, MayNeedFarStub)
+                                           const_cast<GlobalValue *>(GV),
+                                           ACPV, MayNeedFarStub)
     : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               GV, ACPV, MayNeedFarStub);
+                               const_cast<GlobalValue *>(GV), ACPV,
+                               MayNeedFarStub);
   MCE.addRelocation(MR);
 }
 
@@ -391,7 +394,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
           << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
 
     assert(ACPV->isGlobalValue() && "unsupported constant pool value");
-    GlobalValue *GV = ACPV->getGV();
+    const GlobalValue *GV = ACPV->getGV();
     if (GV) {
       Reloc::Model RelocM = TM.getRelocationModel();
       emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
@@ -403,7 +406,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
     }
     emitWordLE(0);
   } else {
-    Constant *CV = MCPE.Val.ConstVal;
+    const Constant *CV = MCPE.Val.ConstVal;
 
     DEBUG({
         errs() << "  ** Constant pool #" << CPI << " @ "
@@ -415,7 +418,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
         errs() << '\n';
       });
 
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
       emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
       emitWordLE(0);
     } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
@@ -559,7 +562,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
     // We allow inline assembler nodes with empty bodies - they can
     // implicitly define registers, which is ok for JIT.
     if (MI.getOperand(0).getSymbolName()[0]) {
-      llvm_report_error("JIT does not support inline asm!");
+      report_fatal_error("JIT does not support inline asm!");
     }
     break;
   }
@@ -704,7 +707,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   const TargetInstrDesc &TID = MI.getDesc();
 
   if (TID.Opcode == ARM::BFC) {
-    llvm_report_error("ARMv6t2 JIT is not yet supported.");
+    report_fatal_error("ARMv6t2 JIT is not yet supported.");
   }
 
   // Part of binary is determined by TableGn.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 90dd0c7..f13ccc6 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -21,7 +21,7 @@
 #include <cstdlib>
 using namespace llvm;
 
-ARMConstantPoolValue::ARMConstantPoolValue(Constant *cval, unsigned id,
+ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
                                            ARMCP::ARMCPKind K,
                                            unsigned char PCAdj,
                                            const char *Modif,
@@ -39,16 +39,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
     CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
     PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
 
-ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif)
+ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
+                                           const char *Modif)
   : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
     CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
     Modifier(Modif) {}
 
-GlobalValue *ARMConstantPoolValue::getGV() const {
+const GlobalValue *ARMConstantPoolValue::getGV() const {
   return dyn_cast_or_null<GlobalValue>(CVal);
 }
 
-BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
+const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
   return dyn_cast_or_null<BlockAddress>(CVal);
 }
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 741acde..6f4eddf 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -36,7 +36,7 @@ namespace ARMCP {
 /// represent PC-relative displacement between the address of the load
 /// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
 class ARMConstantPoolValue : public MachineConstantPoolValue {
-  Constant *CVal;          // Constant being loaded.
+  const Constant *CVal;    // Constant being loaded.
   const char *S;           // ExtSymbol being loaded.
   unsigned LabelId;        // Label id of the load.
   ARMCP::ARMCPKind Kind;   // Kind of constant.
@@ -46,20 +46,20 @@ class ARMConstantPoolValue : public MachineConstantPoolValue {
   bool AddCurrentAddress;
 
 public:
-  ARMConstantPoolValue(Constant *cval, unsigned id,
+  ARMConstantPoolValue(const Constant *cval, unsigned id,
                        ARMCP::ARMCPKind Kind = ARMCP::CPValue,
                        unsigned char PCAdj = 0, const char *Modifier = NULL,
                        bool AddCurrentAddress = false);
   ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
                        unsigned char PCAdj = 0, const char *Modifier = NULL,
                        bool AddCurrentAddress = false);
-  ARMConstantPoolValue(GlobalValue *GV, const char *Modifier);
+  ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier);
   ARMConstantPoolValue();
   ~ARMConstantPoolValue();
 
-  GlobalValue *getGV() const;
+  const GlobalValue *getGV() const;
   const char *getSymbol() const { return S; }
-  BlockAddress *getBlockAddress() const;
+  const BlockAddress *getBlockAddress() const;
   const char *getModifier() const { return Modifier; }
   bool hasModifier() const { return Modifier != NULL; }
   bool mustAddCurrentAddress() const { return AddCurrentAddress; }
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 1b8727d..845d088 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -91,7 +91,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
           LO16 = LO16.addImm(Lo16);
           HI16 = HI16.addImm(Hi16);
         } else {
-          GlobalValue *GV = MO.getGlobal();
+          const GlobalValue *GV = MO.getGlobal();
           unsigned TF = MO.getTargetFlags();
           LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
           HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 7d48663..36a1827 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -13,7 +13,6 @@
 
 #include "ARM.h"
 #include "ARMAddressingModes.h"
-#include "ARMISelLowering.h"
 #include "ARMTargetMachine.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
@@ -121,9 +120,6 @@ private:
   SDNode *SelectARMIndexedLoad(SDNode *N);
   SDNode *SelectT2IndexedLoad(SDNode *N);
 
-  /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
-  SDNode *SelectDYN_ALLOC(SDNode *N);
-
   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// loads of D registers and even subregs and odd subregs of Q registers.
@@ -146,7 +142,7 @@ private:
                           unsigned *QOpcodes1);
 
   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
-  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc);
+  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 
   /// SelectCMOVOp - Select CMOV instructions for ARM.
   SDNode *SelectCMOVOp(SDNode *N);
@@ -939,59 +935,6 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) {
-  DebugLoc dl = N->getDebugLoc();
-  EVT VT = N->getValueType(0);
-  SDValue Chain = N->getOperand(0);
-  SDValue Size = N->getOperand(1);
-  SDValue Align = N->getOperand(2);
-  SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
-  int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
-  if (AlignVal < 0)
-    // We need to align the stack. Use Thumb1 tAND which is the only thumb
-    // instruction that can read and write SP. This matches to a pseudo
-    // instruction that has a chain to ensure the result is written back to
-    // the stack pointer.
-    SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0);
-
-  bool isC = isa<ConstantSDNode>(Size);
-  uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
-  // Handle the most common case for both Thumb1 and Thumb2:
-  // tSUBspi - immediate is between 0 ... 508 inclusive.
-  if (C <= 508 && ((C & 3) == 0))
-    // FIXME: tSUBspi encode scale 4 implicitly.
-    return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
-                                CurDAG->getTargetConstant(C/4, MVT::i32),
-                                Chain);
-
-  if (Subtarget->isThumb1Only()) {
-    // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
-    // should have negated the size operand already. FIXME: We can't insert
-    // new target independent node at this stage so we are forced to negate
-    // it earlier. Is there a better solution?
-    return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
-                                Chain);
-  } else if (Subtarget->isThumb2()) {
-    if (isC && Predicate_t2_so_imm(Size.getNode())) {
-      // t2SUBrSPi
-      SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
-      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
-    } else if (isC && Predicate_imm0_4095(Size.getNode())) {
-      // t2SUBrSPi12
-      SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
-      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
-    } else {
-      // t2SUBrSPs
-      SDValue Ops[] = { SP, Size,
-                        getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
-      return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
-    }
-  }
-
-  // FIXME: Add ADD / SUB sp instructions for ARM.
-  return 0;
-}
-
 /// PairDRegs - Insert a pair of double registers into an implicit def to
 /// form a quad register.
 SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
@@ -1052,7 +995,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     break;
   }
 
-  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
@@ -1142,7 +1085,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
     break;
   }
 
-  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
   SmallVector<SDValue, 10> Ops;
@@ -1249,7 +1192,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   case MVT::v4i32: OpcodeIndex = 1; break;
   }
 
-  SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+  SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
   SmallVector<SDValue, 10> Ops;
@@ -1305,10 +1248,42 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
 }
 
 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
-                                                     unsigned Opc) {
+                                                     bool isSigned) {
   if (!Subtarget->hasV6T2Ops())
     return NULL;
 
+  unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+  // For unsigned extracts, check for a shift right and mask
+  unsigned And_imm = 0;
+  if (N->getOpcode() == ISD::AND) {
+    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+      if (And_imm & (And_imm + 1))
+        return NULL;
+
+      unsigned Srl_imm = 0;
+      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+                                Srl_imm)) {
+        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+        unsigned Width = CountTrailingOnes_32(And_imm);
+        unsigned LSB = Srl_imm;
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          CurDAG->getTargetConstant(LSB, MVT::i32),
+                          CurDAG->getTargetConstant(Width, MVT::i32),
+          getAL(CurDAG), Reg0 };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+      }
+    }
+    return NULL;
+  }
+
+  // Otherwise, we're looking for a shift of a shift
   unsigned Shl_imm = 0;
   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
@@ -1531,7 +1506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
       SDNode *ResNode;
       if (Subtarget->isThumb1Only()) {
-        SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+        SDValue Pred = getAL(CurDAG);
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
         ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
@@ -1571,16 +1546,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
     }
   }
-  case ARMISD::DYN_ALLOC:
-    return SelectDYN_ALLOC(N);
   case ISD::SRL:
-    if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
-                      Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX))
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
       return I;
     break;
   case ISD::SRA:
-    if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
-                      Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX))
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
       return I;
     break;
   case ISD::MUL:
@@ -1624,6 +1595,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     }
     break;
   case ISD::AND: {
+    // Check for unsigned bitfield extract
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+      return I;
+
     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
     // are entirely contributed by c2 and lower 16-bits are entirely contributed
@@ -1708,7 +1683,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = N->getOperand(0);
       SDValue AM5Opc =
         CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
-      SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+      SDValue Pred = getAL(CurDAG);
       SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
       SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
       return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
@@ -1724,7 +1699,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = N->getOperand(0);
       SDValue AM5Opc =
         CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
-      SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+      SDValue Pred = getAL(CurDAG);
       SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
       SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
                         AM5Opc, Pred, PredReg, Chain };
@@ -1816,7 +1791,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VZIPq32; break;
     }
-    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
@@ -1835,7 +1810,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VUZPq32; break;
     }
-    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
@@ -1854,7 +1829,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VTRNq32; break;
     }
-    SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+    SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 77fb0c3..d3842a6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -40,12 +40,18 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <sstream>
 using namespace llvm;
 
+static cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+  cl::desc("Generate calls via indirect call instructions."),
+  cl::init(false));
+
 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                    CCValAssign::LocInfo &LocInfo,
                                    ISD::ArgFlagsTy &ArgFlags,
@@ -90,6 +96,8 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
   if (VT.isInteger()) {
     setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
     setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
@@ -376,10 +384,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   // FIXME: Shouldn't need this, since no register is used, but the legalizer
   // doesn't yet know how to not do that for SjLj.
   setExceptionSelectorRegister(ARM::R0);
-  if (Subtarget->isThumb())
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
-  else
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
 
   if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
@@ -783,7 +788,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) {
+                                   SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -871,7 +876,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     SDValue StackPtr, SDValue Arg,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
-                                    ISD::ArgFlagsTy Flags) {
+                                    ISD::ArgFlagsTy Flags) const {
   unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
@@ -889,7 +894,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
                                          CCValAssign &VA, CCValAssign &NextVA,
                                          SDValue &StackPtr,
                                          SmallVector<SDValue, 8> &MemOpChains,
-                                         ISD::ArgFlagsTy Flags) {
+                                         ISD::ArgFlagsTy Flags) const {
 
   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
@@ -918,7 +923,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   // ARM target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -1025,8 +1030,44 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   bool isLocalARMFunc = false;
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    GlobalValue *GV = G->getGlobal();
+
+  if (EnableARMLongCalls) {
+    assert (getTargetMachine().getRelocationModel() == Reloc::Static
+            && "long-calls with non-static relocation model!");
+    // Handle a global address or an external symbol. If it's not one of
+    // those, the target's already in a register, so we don't need to do
+    // anything extra.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+      const GlobalValue *GV = G->getGlobal();
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
+    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+      const char *Sym = S->getSymbol();
+
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       Sym, ARMPCLabelIndex, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
+    }
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
     isDirect = true;
     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
@@ -1049,7 +1090,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
-   } else
+    } else
       Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     isDirect = true;
@@ -1125,7 +1166,7 @@ SDValue
 ARMTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to a location.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -1232,13 +1273,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
 }
 
-SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned ARMPCLabelIndex = 0;
   DebugLoc DL = Op.getDebugLoc();
   EVT PtrVT = getPointerTy();
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   SDValue CPAddr;
   if (RelocM == Reloc::Static) {
@@ -1264,7 +1306,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
 SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   DebugLoc dl = GA->getDebugLoc();
   EVT PtrVT = getPointerTy();
   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -1303,8 +1345,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
 // "local exec" model.
 SDValue
 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
-                                        SelectionDAG &DAG) {
-  GlobalValue *GV = GA->getGlobal();
+                                        SelectionDAG &DAG) const {
+  const GlobalValue *GV = GA->getGlobal();
   DebugLoc dl = GA->getDebugLoc();
   SDValue Offset;
   SDValue Chain = DAG.getEntryNode();
@@ -1350,7 +1392,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
 }
 
 SDValue
-ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   // TODO: implement the "local dynamic" model
   assert(Subtarget->isTargetELF() &&
          "TLS not implemented for non-ELF targets");
@@ -1364,10 +1406,10 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   if (RelocM == Reloc::PIC_) {
     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
@@ -1404,13 +1446,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 }
 
 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
-                                                    SelectionDAG &DAG) {
+                                                    SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned ARMPCLabelIndex = 0;
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   SDValue CPAddr;
   if (RelocM == Reloc::Static)
@@ -1443,7 +1485,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 }
 
 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
-                                                    SelectionDAG &DAG){
+                                                    SelectionDAG &DAG) const {
   assert(Subtarget->isTargetELF() &&
          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
   MachineFunction &MF = DAG.getMachineFunction();
@@ -1466,7 +1508,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
 
 SDValue
 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
-                                           const ARMSubtarget *Subtarget) {
+                                           const ARMSubtarget *Subtarget)
+                                             const {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
   switch (IntNo) {
@@ -1533,20 +1576,23 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
   return Res;
 }
 
-static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                            unsigned VarArgsFrameIndex) {
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   DebugLoc dl = Op.getDebugLoc();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
                       false, false, 0);
 }
 
 SDValue
-ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   EVT VT = Node->getValueType(0);
@@ -1595,7 +1641,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 SDValue
 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
                                         SDValue &Root, SelectionDAG &DAG,
-                                        DebugLoc dl) {
+                                        DebugLoc dl) const {
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
@@ -1611,10 +1657,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
 
   SDValue ArgValue2;
   if (NextVA.isMemLoc()) {
-    unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8;
     MachineFrameInfo *MFI = MF.getFrameInfo();
-    int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(),
-                                    true, false);
+    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
 
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1635,7 +1679,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                           &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1663,14 +1708,22 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       if (VA.needsCustom()) {
         // f64 and vector types are split up into multiple registers or
         // combinations of registers and stack slots.
-        RegVT = MVT::i32;
-
         if (VA.getLocVT() == MVT::v2f64) {
           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
                                                    Chain, DAG, dl);
           VA = ArgLocs[++i]; // skip ahead to next loc
-          SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
-                                                   Chain, DAG, dl);
+          SDValue ArgValue2;
+          if (VA.isMemLoc()) {
+            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
+                                            true, false);
+            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+                                    PseudoSourceValue::getFixedStack(FI), 0,
+                                    false, false, 0);
+          } else {
+            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+                                             Chain, DAG, dl);
+          }
           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
@@ -1758,10 +1811,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       // to their spots on the stack so that they may be loaded by deferencing
       // the result of va_next.
       AFI->setVarArgsRegSaveSize(VARegSaveSize);
-      VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
-                                                 VARegSaveSize - VARegSize,
-                                                 true, false);
-      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+      AFI->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(VARegSaveSize,
+                               ArgOffset + VARegSaveSize - VARegSize,
+                               true, false));
+      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+                                      getPointerTy());
 
       SmallVector<SDValue, 4> MemOps;
       for (; NumGPRs < 4; ++NumGPRs) {
@@ -1773,9 +1828,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                     PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0,
-                                     false, false, 0);
+        SDValue Store =
+          DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                       PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0,
+                       false, false, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                           DAG.getConstant(4, getPointerTy()));
@@ -1785,7 +1841,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                             &MemOps[0], MemOps.size());
     } else
       // This will point to the next argument passed via stack.
-      VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false);
+      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
+                                                       true, false));
   }
 
   return Chain;
@@ -1800,7 +1857,7 @@ static bool isFloatingPointZero(SDValue Op) {
     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
-        if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
           return CFP->getValueAPF().isPosZero();
     }
   }
@@ -1811,7 +1868,8 @@ static bool isFloatingPointZero(SDValue Op) {
 /// the given operands.
 SDValue
 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                             SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) {
+                             SDValue &ARMCC, SelectionDAG &DAG,
+                             DebugLoc dl) const {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     unsigned C = RHSC->getZExtValue();
     if (!isLegalICmpImmediate(C)) {
@@ -1877,7 +1935,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
 }
 
-SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -1911,7 +1969,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   return Result;
 }
 
-SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue  Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue    LHS = Op.getOperand(2);
@@ -1945,7 +2003,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
   return Res;
 }
 
-SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Table = Op.getOperand(1);
   SDValue Index = Op.getOperand(2);
@@ -2034,7 +2092,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
 }
 
-SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
@@ -2055,8 +2113,10 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                            SDValue Dst, SDValue Src,
                                            SDValue Size, unsigned Align,
                                            bool isVolatile, bool AlwaysInline,
-                                         const Value *DstSV, uint64_t DstSVOff,
-                                         const Value *SrcSV, uint64_t SrcSVOff){
+                                           const Value *DstSV,
+                                           uint64_t DstSVOff,
+                                           const Value *SrcSV,
+                                           uint64_t SrcSVOff) const {
   // Do repeated 4-byte loads and stores. To be improved.
   // This requires 4-byte alignment.
   if ((Align & 3) != 0)
@@ -2157,11 +2217,25 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
 }
 
+/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
 static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
-  SDValue Op = N->getOperand(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   DebugLoc dl = N->getDebugLoc();
-  if (N->getValueType(0) == MVT::f64) {
-    // Turn i64->f64 into VMOVDRR.
+  SDValue Op = N->getOperand(0);
+
+  // This function is only supposed to be called for i64 types, either as the
+  // source or destination of the bit convert.
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
+  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+         "ExpandBIT_CONVERT called for non-i64 type");
+
+  // Turn i64->f64 into VMOVDRR.
+  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
                              DAG.getConstant(0, MVT::i32));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
@@ -2170,11 +2244,14 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
   }
 
   // Turn f64->i64 into VMOVRRD.
-  SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
-                            DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+    // Merge the pieces into a single i64 value.
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+  }
 
-  // Merge the pieces into a single i64 value.
-  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+  return SDValue();
 }
 
 /// getZeroVector - Returns a vector of specified type with all zero elements.
@@ -2227,7 +2304,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
 
 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
-SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+                                                SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -2262,7 +2340,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
-SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+                                               SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -3059,7 +3138,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
 }
 
-SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Don't know how to custom lower this!");
   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
@@ -3072,7 +3151,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
-  case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+  case ISD::VASTART:       return LowerVASTART(Op, DAG);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
@@ -3105,22 +3184,22 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 /// type with new values built out of custom code.
 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
+  SDValue Res;
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this!");
-    return;
+    break;
   case ISD::BIT_CONVERT:
-    Results.push_back(ExpandBIT_CONVERT(N, DAG));
-    return;
+    Res = ExpandBIT_CONVERT(N, DAG);
+    break;
   case ISD::SRL:
-  case ISD::SRA: {
-    SDValue Res = LowerShift(N, DAG, Subtarget);
-    if (Res.getNode())
-      Results.push_back(Res);
-    return;
-  }
+  case ISD::SRA:
+    Res = LowerShift(N, DAG, Subtarget);
+    break;
   }
+  if (Res.getNode())
+    Results.push_back(Res);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3302,8 +3381,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                               MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   bool isThumb2 = Subtarget->isThumb2();
@@ -3387,12 +3465,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-           E = BB->succ_end(); I != E; ++I) {
-      EM->insert(std::make_pair(*I, sinkMBB));
+           E = BB->succ_end(); I != E; ++I)
       sinkMBB->addSuccessor(*I);
-    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
     while (!BB->succ_empty())
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index fa33ad3..d8a230f 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -159,25 +159,24 @@ namespace llvm {
   //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
 
   class ARMTargetLowering : public TargetLowering {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
   public:
     explicit ARMTargetLowering(TargetMachine &TM);
 
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                       DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
@@ -237,7 +236,7 @@ namespace llvm {
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
 
-    virtual const ARMSubtarget* getSubtarget() {
+    virtual const ARMSubtarget* getSubtarget() const {
       return Subtarget;
     }
 
@@ -272,54 +271,57 @@ namespace llvm {
                           CCValAssign &VA, CCValAssign &NextVA,
                           SDValue &StackPtr,
                           SmallVector<SDValue, 8> &MemOpChains,
-                          ISD::ArgFlagsTy Flags);
+                          ISD::ArgFlagsTy Flags) const;
     SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
-                                 SDValue &Root, SelectionDAG &DAG, DebugLoc dl);
+                                 SDValue &Root, SelectionDAG &DAG,
+                                 DebugLoc dl) const;
 
     CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const;
     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
                              DebugLoc dl, SelectionDAG &DAG,
                              const CCValAssign &VA,
-                             ISD::ArgFlagsTy Flags);
-    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG);
+                             ISD::ArgFlagsTy Flags) const;
+    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
-                                    const ARMSubtarget *Subtarget);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+                                    const ARMSubtarget *Subtarget) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
-                                            SelectionDAG &DAG);
+                                            SelectionDAG &DAG) const;
     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
-                                   SelectionDAG &DAG);
-    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG);
+                                   SelectionDAG &DAG) const;
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                       SDValue Chain,
                                       SDValue Dst, SDValue Src,
                                       SDValue Size, unsigned Align,
                                       bool isVolatile, bool AlwaysInline,
-                                      const Value *DstSV, uint64_t DstSVOff,
-                                      const Value *SrcSV, uint64_t SrcSVOff);
+                                      const Value *DstSV,
+                                      uint64_t DstSVOff,
+                                      const Value *SrcSV,
+                                      uint64_t SrcSVOff) const;
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -328,16 +330,16 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
+                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
 
     MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
                                          MachineBasicBlock *BB,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index f2ab06f..ce5f2f8 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -124,6 +124,7 @@ def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
 def HasV6T2   : Predicate<"Subtarget->hasV6T2Ops()">;
 def NoV6T2    : Predicate<"!Subtarget->hasV6T2Ops()">;
 def HasV7     : Predicate<"Subtarget->hasV7Ops()">;
+def NoVFP     : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2   : Predicate<"Subtarget->hasVFP2()">;
 def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
 def HasNEON   : Predicate<"Subtarget->hasNEON()">;
@@ -1231,7 +1232,7 @@ def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
 }
 
 def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
-                 (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru,
+                 (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
                  "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
@@ -2533,7 +2534,23 @@ let Defs =
                                "mov\tr0, #0\n\t"
                                "add\tpc, pc, #0\n\t"
                                "mov\tr0, #1 @ eh_setjmp end", "",
-                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>;
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                           Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ] in {
+  def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
+                                   AddrModeNone, SizeSpecial, IndexModeNone,
+                                   Pseudo, NoItinerary,
+                                   "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+                                   "add\t$val, pc, #8\n\t"
+                                   "str\t$val, [$src, #+4]\n\t"
+                                   "mov\tr0, #0\n\t"
+                                   "add\tpc, pc, #0\n\t"
+                                   "mov\tr0, #1 @ eh_setjmp end", "",
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                                Requires<[IsARM, NoVFP]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2747,7 +2764,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_OFFSET : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      opc, "l\tp$cop, cr$CRd, $addr"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 0; // W = 0
@@ -2757,7 +2774,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_PRE : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      opc, "l\tp$cop, cr$CRd, $addr!"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 1; // W = 1
@@ -2767,7 +2784,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_POST : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
-      opc, "l\tp$cop, cr$CRd, [$base], $offset"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{21} = 1; // W = 1
@@ -2777,7 +2794,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
 
   def L_OPTION : ACI<(outs),
       (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
-      opc, "l\tp$cop, cr$CRd, [$base], $option"> {
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{23} = 1; // U = 1
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index ed9d31d..d5ce2b8 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1621,12 +1621,13 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                      InstrItinClass itin, string OpcodeStr, string Dt,
+                      InstrItinClass itin16, InstrItinClass itin32,
+                      string OpcodeStr, string Dt,
                       Intrinsic IntOp, bit Commutable = 0> {
-  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 
+  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 
                       OpcodeStr, !strconcat(Dt, "16"),
                       v4i32, v4i16, IntOp, Commutable>;
-  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin,
+  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
                       OpcodeStr, !strconcat(Dt, "32"),
                       v2i64, v2i32, IntOp, Commutable>;
 }
@@ -1642,11 +1643,12 @@ multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt,
                        Intrinsic IntOp, bit Commutable = 0>
-  : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt,
+  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
                IntOp, Commutable> {
-  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 
+  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
                       OpcodeStr, !strconcat(Dt, "8"),
                       v8i16, v8i8, IntOp, Commutable>;
 }
@@ -1711,21 +1713,22 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
 // Neon 3-argument intrinsics,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itinD, InstrItinClass itinQ,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
-  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
+  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
                        OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
-  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
+  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
                        OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
-  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
+  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
                        OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
-  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
+  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
                        OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
-  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
+  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
                        OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
-  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
+  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
 }
 
@@ -1734,10 +1737,11 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 
 // First with only element sizes of 16 and 32 bits:
 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
-  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
+  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
-  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D,
+  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
 }
 
@@ -1751,9 +1755,10 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
 
 // ....then also with element size of 8 bits:
 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itin16, InstrItinClass itin32,
                         string OpcodeStr, string Dt, Intrinsic IntOp>
-  : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> {
-  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
+  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
 }
 
@@ -2001,10 +2006,10 @@ def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
 def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
                      v4f32, v4f32, fadd, 1>;
 //   VADDL    : Vector Add Long (Q = D + D)
-defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s",
-                            int_arm_neon_vaddls, 1>;
-defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u",
-                            int_arm_neon_vaddlu, 1>;
+defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "s", int_arm_neon_vaddls, 1>;
+defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "u", int_arm_neon_vaddlu, 1>;
 //   VADDW    : Vector Add Wide (Q = Q + D)
 defm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
 defm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
@@ -2118,10 +2123,10 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
                                   (SubReg_i32_lane imm:$lane)))>;
 
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s",
-                            int_arm_neon_vmulls, 1>;
-defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u",
-                            int_arm_neon_vmullu, 1>;
+defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                            "vmull", "s", int_arm_neon_vmulls, 1>;
+defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                            "vmull", "u", int_arm_neon_vmullu, 1>;
 def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
                         v8i16, v8i8, int_arm_neon_vmullp, 1>;
 defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
@@ -2130,10 +2135,10 @@ defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
                              int_arm_neon_vmullu>;
 
 //   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
-defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s",
-                           int_arm_neon_vqdmull, 1>;
-defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s",
-                             int_arm_neon_vqdmull>;
+defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+                             "vqdmull", "s", int_arm_neon_vqdmull>;
 
 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
 
@@ -2177,15 +2182,17 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlal", "s", int_arm_neon_vmlals>;
+defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlal", "u", int_arm_neon_vmlalu>;
 
 defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
 defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
 
 //   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
-defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s",
-                            int_arm_neon_vqdmlal>;
+defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
 
 //   VMLS     : Vector Multiply Subtract (integer and floating-point)
@@ -2227,15 +2234,17 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlsl", "s", int_arm_neon_vmlsls>;
+defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                             "vmlsl", "u", int_arm_neon_vmlslu>;
 
 defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
 defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
 
 //   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
-defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s",
-                            int_arm_neon_vqdmlsl>;
+defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 
 // Vector Subtract Operations.
@@ -2248,26 +2257,26 @@ def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
 def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
                      v4f32, v4f32, fsub, 0>;
 //   VSUBL    : Vector Subtract Long (Q = D - D)
-defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s",
-                            int_arm_neon_vsubls, 1>;
-defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u",
-                            int_arm_neon_vsublu, 1>;
+defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "s", int_arm_neon_vsubls, 1>;
+defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "u", int_arm_neon_vsublu, 1>;
 //   VSUBW    : Vector Subtract Wide (Q = Q - D)
 defm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
 defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
 //   VHSUB    : Vector Halving Subtract
 defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vhsub", "s", int_arm_neon_vhsubs, 0>;
 defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vhsub", "u", int_arm_neon_vhsubu, 0>;
 //   VQSUB    : Vector Saturing Subtract
 defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vqsub", "s", int_arm_neon_vqsubs, 0>;
 defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vqsub", "u", int_arm_neon_vqsubu, 0>;
 //   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
 defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
@@ -2279,8 +2288,8 @@ defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
 // Vector Comparisons.
 
 //   VCEQ     : Vector Compare Equal
-defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>;
+defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
 def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
@@ -2290,10 +2299,10 @@ defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
                            "$dst, $src, #0">;
 
 //   VCGE     : Vector Compare Greater Than or Equal
-defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
-                        IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
-defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>;
+defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 
+                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
 def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
                      NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
@@ -2306,10 +2315,10 @@ defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
                             "$dst, $src, #0">;
 
 //   VCGT     : Vector Compare Greater Than
-defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
-defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
-                        IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>;
+defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
 def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
@@ -2387,11 +2396,11 @@ def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
 
 //   VMVN     : Vector Bitwise NOT
 def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
-                     (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
+                     (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
                      [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
 def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
-                     (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
+                     (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
                      [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
 def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;
@@ -2447,10 +2456,10 @@ def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
 
 //   VABD     : Vector Absolute Difference
 defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vabd", "s", int_arm_neon_vabds, 0>;
 defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
-                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vabd", "u", int_arm_neon_vabdu, 0>;
 def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
                         "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
@@ -2458,56 +2467,68 @@ def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
                         "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q,
+defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
                             "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q,
+defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
                              "vabdl", "u", int_arm_neon_vabdlu, 0>;
 
 //   VABA     : Vector Absolute Difference and Accumulate
-defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>;
-defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>;
+defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                            "vaba", "s", int_arm_neon_vabas>;
+defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                            "vaba", "u", int_arm_neon_vabau>;
 
 //   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>;
-defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
+                             "vabal", "s", int_arm_neon_vabals>;
+defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
+                             "vabal", "u", int_arm_neon_vabalu>;
 
 // Vector Maximum and Minimum.
 
 //   VMAX     : Vector Maximum
 defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmax", "s", int_arm_neon_vmaxs, 1>;
 defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmax", "u", int_arm_neon_vmaxu, 1>;
-def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax",
-                        "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>;
-def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax",
-                        "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmax", "f32",
+                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
 
 //   VMIN     : Vector Minimum
 defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmin", "s", int_arm_neon_vmins, 1>;
 defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
-                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
                            "vmin", "u", int_arm_neon_vminu, 1>;
-def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin",
-                        "f32", v2f32, v2f32, int_arm_neon_vmins, 1>;
-def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin",
-                        "f32", v4f32, v4f32, int_arm_neon_vmins, 1>;
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vmins, 1>;
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmin", "f32",
+                        v4f32, v4f32, int_arm_neon_vmins, 1>;
 
 // Vector Pairwise Operations.
 
 //   VPADD    : Vector Pairwise Add
-def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>;
-def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>;
-def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
-                        "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd",
-                        "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i8",
+                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i16",
+                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i32",
+                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 
+                        IIC_VBIND, "vpadd", "f32",
+                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
 
 //   VPADDL   : Vector Pairwise Add Long
 defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -2522,35 +2543,35 @@ defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
                               int_arm_neon_vpadalu>;
 
 //   VPMAX    : Vector Pairwise Maximum
-def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
-def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
-def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
-def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
 
 //   VPMIN    : Vector Pairwise Minimum
-def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
-def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
-def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
-def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
-def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
-def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin",
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 262aae4..742bd40 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2386,9 +2386,25 @@ let Defs =
                                "\tb\t1f\n"
                                "\tmovs\tr0, #1\t@ end eh.setjmp\n"
                                "1:", "",
-                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>;
+                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+                             Requires<[IsThumb2, HasVFP2]>;
 }
 
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ] in {
+  def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+                               AddrModeNone, SizeSpecial, NoItinerary,
+                               "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+                               "\tmov\t$val, pc\n"
+                               "\tadds\t$val, #9\n"
+                               "\tstr\t$val, [$src, #4]\n"
+                               "\tmovs\tr0, #0\n"
+                               "\tb\t1f\n"
+                               "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+                               "1:", "",
+                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+                                  Requires<[IsThumb2, NoVFP]>;
+}
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 0458389..36fcaa1 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -256,25 +256,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 // Between half-precision and single-precision.  For disassembly only.
 
 def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def : ARMPat<(f32_to_f16 SPR:$a),
              (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
 
 def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def : ARMPat<(f16_to_f32 GPR:$a),
              (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
 def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
-                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a",
+                 /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
 let neverHasSideEffects = 1 in {
@@ -306,23 +306,23 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
 //
 
 def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
-                 IIC_VMOVSI, "vmov", "\t$dst, $src",
+                 IIC_fpMOVSI, "vmov", "\t$dst, $src",
                  [(set GPR:$dst, (bitconvert SPR:$src))]>;
 
 def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
-                 IIC_VMOVIS, "vmov", "\t$dst, $src",
+                 IIC_fpMOVIS, "vmov", "\t$dst, $src",
                  [(set SPR:$dst, (bitconvert GPR:$src))]>;
 
 def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                       (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
-                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src",
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
                  [/* FIXME: Can't write pattern for multiple result instr*/]> {
   let Inst{7-6} = 0b00;
 }
 
 def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                       (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
-                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
 }
@@ -332,14 +332,14 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
 
 def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                      (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "vmov", "\t$dst, $src1, $src2",
+                IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2",
                 [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
   let Inst{7-6} = 0b00;
 }
 
 def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
-                IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+                IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
 }
@@ -678,7 +678,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
 // Materialize FP immediates. VFP3 only.
 let isReMaterializable = 1 in {
 def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
-                    VFPMiscFrm, IIC_VMOVImm,
+                    VFPMiscFrm, IIC_fpUNA64,
                     "vmov", ".f64\t$dst, $imm",
                     [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
@@ -689,7 +689,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
 }
 
 def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
-                    VFPMiscFrm, IIC_VMOVImm,
+                    VFPMiscFrm, IIC_fpUNA32,
                     "vmov", ".f32\t$dst, $imm",
                     [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
   let Inst{27-23} = 0b11101;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 8c0b720..b31a4fa 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -27,7 +27,7 @@
 using namespace llvm;
 
 void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  llvm_report_error("ARMJITInfo::replaceMachineCodeForFunction");
+  report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
 }
 
 /// JITCompilerFunction - This contains the address of the JIT function used to
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index cb762a4..8585c1e 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1358,7 +1358,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
     return false;
 
   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
-  Function *Func = MF->getFunction();
+  const Function *Func = MF->getFunction();
   unsigned ReqAlign = STI->hasV6Ops()
     ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 
     : 8;  // Pre-v6 need 8-byte align
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index c998ede..0134276 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -85,6 +85,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
   unsigned ConstPoolEntryUId;
 
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   ARMFunctionInfo() :
     isThumb(false),
@@ -94,7 +97,7 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
-    JumpTableUId(0), ConstPoolEntryUId(0) {}
+    JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -105,7 +108,7 @@ public:
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
     SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
-    JumpTableUId(0), ConstPoolEntryUId(0) {}
+    JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
 
   bool isThumbFunction() const { return isThumb; }
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -223,6 +226,9 @@ public:
   unsigned createConstPoolEntryUId() {
     return ConstPoolEntryUId++;
   }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index fc4c5f5..b60ccca 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -8,17 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Functional units across ARM processors
-//
-def FU_Issue   : FuncUnit; // issue
-def FU_Pipe0   : FuncUnit; // pipeline 0
-def FU_Pipe1   : FuncUnit; // pipeline 1
-def FU_LdSt0   : FuncUnit; // pipeline 0 load/store
-def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
-def FU_NPipe   : FuncUnit; // NEON ALU/MUL pipe
-def FU_NLSPipe : FuncUnit; // NEON LS pipe
-
-//===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
 //
 def IIC_iALUx      : InstrItinClass;
@@ -69,10 +58,16 @@ def IIC_fpCMP32    : InstrItinClass;
 def IIC_fpCMP64    : InstrItinClass;
 def IIC_fpCVTSD    : InstrItinClass;
 def IIC_fpCVTDS    : InstrItinClass;
+def IIC_fpCVTSH    : InstrItinClass;
+def IIC_fpCVTHS    : InstrItinClass;
 def IIC_fpCVTIS    : InstrItinClass;
 def IIC_fpCVTID    : InstrItinClass;
 def IIC_fpCVTSI    : InstrItinClass;
 def IIC_fpCVTDI    : InstrItinClass;
+def IIC_fpMOVIS    : InstrItinClass;
+def IIC_fpMOVID    : InstrItinClass;
+def IIC_fpMOVSI    : InstrItinClass;
+def IIC_fpMOVDI    : InstrItinClass;
 def IIC_fpALU32    : InstrItinClass;
 def IIC_fpALU64    : InstrItinClass;
 def IIC_fpMUL32    : InstrItinClass;
@@ -125,6 +120,10 @@ def IIC_VSUBiD     : InstrItinClass;
 def IIC_VSUBiQ     : InstrItinClass;
 def IIC_VBINi4D    : InstrItinClass;
 def IIC_VBINi4Q    : InstrItinClass;
+def IIC_VSUBi4D    : InstrItinClass;
+def IIC_VSUBi4Q    : InstrItinClass;
+def IIC_VABAD      : InstrItinClass;
+def IIC_VABAQ      : InstrItinClass;
 def IIC_VSHLiD     : InstrItinClass;
 def IIC_VSHLiQ     : InstrItinClass;
 def IIC_VSHLi4D    : InstrItinClass;
@@ -153,8 +152,8 @@ def IIC_VTBX4      : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
 
-def GenericItineraries : ProcessorItineraries<[]>;
-
+def GenericItineraries : ProcessorItineraries<[], []>;
 
 include "ARMScheduleV6.td"
-include "ARMScheduleV7.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 0000000..bbfc0b2
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,618 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Issue   : FuncUnit; // issue
+def A8_Pipe0   : FuncUnit; // pipeline 0
+def A8_Pipe1   : FuncUnit; // pipeline 1
+def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
+def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
+def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+  [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
+  // Two fully-pipelined integer ALU pipelines
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+
+  // Integer multiply pipeline
+  // Result written in E5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>, 
+                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>, 
+                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>, 
+                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>, 
+                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>, 
+                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  
+  // Integer load pipeline
+  //
+  // loads have an extra cycle of latency, but are fully pipelined
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple
+  InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>]>,
+
+  // Integer store pipeline
+  //
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<1, [A8_Pipe0], 0>,
+                                InstrStage<1, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
+                                InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_Pipe1]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                InstrStage<1, [A8_LdSt0]>]>,
+  
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                              InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<7, [A8_NPipe], 0>,
+                               InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<5, [A8_NPipe], 0>,
+                               InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<9, [A8_NPipe], 0>,
+                               InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<11, [A8_NPipe], 0>,
+                               InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<20, [A8_NPipe], 0>,
+                               InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+  //
+  // Single-precision FP Load
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0], 0>,
+                               InstrStage<1, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // FP Load Multiple
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>, 
+                               InstrStage<2, [A8_Pipe0], 0>,
+                               InstrStage<2, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0], 0>,
+                               InstrStage<1, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // FP Store Multiple
+  // use A8_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, 
+                               InstrStage<2, [A8_Pipe0], 0>,
+                               InstrStage<2, [A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  //
+  // VLD1
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // VLD2
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>, 
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LdSt0], 0>,
+                               InstrStage<1, [A8_NLSPipe]>]>,
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 0000000..75320d9
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,749 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue   : FuncUnit; // issue
+def A9_Pipe0   : FuncUnit; // pipeline 0
+def A9_Pipe1   : FuncUnit; // pipeline 1
+def A9_LSPipe  : FuncUnit; // LS pipe
+def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN  : FuncUnit; // FP register set, NEON side
+
+// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
+//
+def CortexA9Itineraries : ProcessorItineraries<
+  [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration (including time to
+  //    register file writeback!).
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
+  // stalls.
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit.
+
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                              InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                              InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+
+  //
+  // Single to Half FP Convert
+  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Half to Single FP Convert
+  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1,   [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<13,  [A9_NPipe]>], [17, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1,  [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<28, [A9_NPipe]>], [32, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+  //
+  // Single-precision FP Load
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Double-precision FP Load
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // FP Load Multiple
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Single-precision FP Store
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Double-precision FP Store
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // FP Store Multiple
+  // use A9_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  // FIXME: Neon pipeline and LdSt unit are multiplexed. 
+  //        Add some syntactic sugar to model this!
+  // VLD1
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // VLD2
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+  //
+  // VLD3
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
+  //
+  // VLD4
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
+  //
+  // VST
+  // FIXME: We don't model this instruction properly
+  InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Issue], 0>, 
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_NPipe]>]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
+
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_LSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
+  // FIXME: all latencies are arbitrary, no information is available
+                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
+
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+                               InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 0fef466..f813022 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -13,103 +13,107 @@
 
 // Model based on ARM1176
 //
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
 // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
 //
-def ARMV6Itineraries : ProcessorItineraries<[
+def ARMV6Itineraries : ProcessorItineraries<
+  [V6_Pipe], [
   //
   // No operand cycles
-  InstrItinData<IIC_iALUx    , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [V6_Pipe]>]>,
   //
   // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iALUr    , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
-  InstrItinData<IIC_iALUsi   , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
-  InstrItinData<IIC_iALUsr   , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>,
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
   //
   // Unary Instructions that produce a result
-  InstrItinData<IIC_iUNAr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
   //
   // Compare instructions
-  InstrItinData<IIC_iCMPi    , [InstrStage<1, [FU_Pipe0]>], [2]>,
-  InstrItinData<IIC_iCMPr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
-  InstrItinData<IIC_iCMPsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
   //
   // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi    , [InstrStage<1, [FU_Pipe0]>], [2]>,
-  InstrItinData<IIC_iMOVr    , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
-  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
-  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
   //
   // Move instructions, conditional
-  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [FU_Pipe0]>], [3]>,
-  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [FU_Pipe0]>], [3, 2]>,
-  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [FU_Pipe0]>], [3, 1]>,
-  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [V6_Pipe]>], [3]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
 
   // Integer multiply pipeline
   //
-  InstrItinData<IIC_iMUL16   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
-  InstrItinData<IIC_iMAC16   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>,
-  InstrItinData<IIC_iMUL32   , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>,
-  InstrItinData<IIC_iMAC32   , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>,
-  InstrItinData<IIC_iMUL64   , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>,
-  InstrItinData<IIC_iMAC64   , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
   
   // Integer load pipeline
   //
   // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [FU_Pipe0]>], [4, 1]>,
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>,
+  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>,
+  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
 
   //
   // Load multiple
-  InstrItinData<IIC_iLoadm   , [InstrStage<3, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iLoadm   , [InstrStage<3, [V6_Pipe]>]>,
 
   // Integer store pipeline
   //
   // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
 
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
   //
   // Store multiple
-  InstrItinData<IIC_iStorem   , [InstrStage<3, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStorem   , [InstrStage<3, [V6_Pipe]>]>,
   
   // Branch
   //
   // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_Br      , [InstrStage<1, [V6_Pipe]>]>,
 
   // VFP
   // Issue through integer pipeline, and execute in NEON unit. We assume
@@ -117,84 +121,84 @@ def ARMV6Itineraries : ProcessorItineraries<[
   // possible.
   //
   // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>,
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
   //
   // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
   //
   // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
   //
   // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
   //
   // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>,
+  InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
   //
   // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>,
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
   //
   // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>,
+  InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
   //
   // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+  InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
   //
   // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+  InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
   //
   // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+  InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
   //
   // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+  InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
   //
   // Single-precision FP Load
-  InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+  InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
   //
   // Double-precision FP Load
-  InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+  InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
   //
   // FP Load Multiple
-  InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>,
   //
   // Single-precision FP Store
-  InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+  InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
   //
   // Double-precision FP Store
   // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+  InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
   //
   // FP Store Multiple
-  InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]>
+  InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]>
 ]>;
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
deleted file mode 100644
index bbbf413..0000000
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ /dev/null
@@ -1,587 +0,0 @@
-//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines the itinerary class data for the ARM v7 processors.
-//
-//===----------------------------------------------------------------------===//
-
-//
-// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
-//
-// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
-//
-def CortexA8Itineraries : ProcessorItineraries<[
-
-  // Two fully-pipelined integer ALU pipelines
-  //
-  // No operand cycles
-  InstrItinData<IIC_iALUx    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
-  //
-  // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iALUr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>,
-  InstrItinData<IIC_iALUsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>,
-  InstrItinData<IIC_iALUsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>,
-  //
-  // Unary Instructions that produce a result
-  InstrItinData<IIC_iUNAr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
-  //
-  // Compare instructions
-  InstrItinData<IIC_iCMPi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMPr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMPsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
-  //
-  // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>,
-  InstrItinData<IIC_iMOVr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>,
-  //
-  // Move instructions, conditional
-  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
-
-  // Integer multiply pipeline
-  // Result written in E5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  //
-  InstrItinData<IIC_iMUL16   , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>,
-  InstrItinData<IIC_iMAC16   , [InstrStage<1, [FU_Pipe1], 0>, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData<IIC_iMUL32   , [InstrStage<1, [FU_Pipe1], 0>, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>,
-  InstrItinData<IIC_iMAC32   , [InstrStage<1, [FU_Pipe1], 0>, 
-                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData<IIC_iMUL64   , [InstrStage<2, [FU_Pipe1], 0>, 
-                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
-  InstrItinData<IIC_iMAC64   , [InstrStage<2, [FU_Pipe1], 0>, 
-                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
-  
-  // Integer load pipeline
-  //
-  // loads have an extra cycle of latency, but are fully pipelined
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  //
-  // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>,
-  //
-  // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>,
-  //
-  // Load multiple
-  InstrItinData<IIC_iLoadm   , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<2, [FU_Pipe0], 0>,
-                                InstrStage<2, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>]>,
-
-  // Integer store pipeline
-  //
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  //
-  // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iStoreru  , [InstrStage<1, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>,
-  //
-  // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<1, [FU_Pipe0], 0>,
-                                InstrStage<1, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>,
-  //
-  // Store multiple
-  InstrItinData<IIC_iStorem  , [InstrStage<2, [FU_Issue], 0>,
-                                InstrStage<2, [FU_Pipe0], 0>,
-                                InstrStage<2, [FU_Pipe1]>,
-                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                                InstrStage<1, [FU_LdSt0]>]>,
-  
-  // Branch
-  //
-  // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
-
-  // VFP
-  // Issue through integer pipeline, and execute in NEON unit. We assume
-  // RunFast mode so that NFP pipeline is used for single-precision when
-  // possible.
-  //
-  // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                              InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
-  //
-  // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [1, 1]>,
-  //
-  // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<4, [FU_NPipe], 0>,
-                               InstrStage<4, [FU_NLSPipe]>], [4, 1]>,
-  //
-  // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<7, [FU_NPipe], 0>,
-                               InstrStage<7, [FU_NLSPipe]>], [7, 1]>,
-  //
-  // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<5, [FU_NPipe], 0>,
-                               InstrStage<5, [FU_NLSPipe]>], [5, 1]>,
-  //
-  // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
-  //
-  // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1]>,
-  //
-  // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<8, [FU_NPipe], 0>,
-                               InstrStage<8, [FU_NLSPipe]>], [8, 1]>,
-  //
-  // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
-  //
-  // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<9, [FU_NPipe], 0>,
-                               InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>,
-  //
-  // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
-  //
-  // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<11, [FU_NPipe], 0>,
-                               InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>,
-  //
-  // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>,
-  //
-  // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>,
-  //
-  // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<20, [FU_NPipe], 0>,
-                               InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>,
-  //
-  // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>,
-  //
-  // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<19, [FU_NPipe], 0>,
-                               InstrStage<19, [FU_NLSPipe]>], [19, 1]>,
-  //
-  // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<29, [FU_NPipe], 0>,
-                               InstrStage<29, [FU_NLSPipe]>], [29, 1]>,
-  //
-  // Single-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-precision FP Load
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0], 0>,
-                               InstrStage<1, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // FP Load Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoadm,  [InstrStage<3, [FU_Issue], 0>, 
-                               InstrStage<2, [FU_Pipe0], 0>,
-                               InstrStage<2, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Single-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-precision FP Store
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0], 0>,
-                               InstrStage<1, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // FP Store Multiple
-  // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>, 
-                               InstrStage<2, [FU_Pipe0], 0>,
-                               InstrStage<2, [FU_Pipe1]>,
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-
-  // NEON
-  // Issue through integer pipeline, and execute in NEON unit.
-  //
-  // VLD1
-  InstrItinData<IIC_VLD1,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // VLD2
-  InstrItinData<IIC_VLD2,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>,
-  //
-  // VLD3
-  InstrItinData<IIC_VLD3,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>,
-  //
-  // VLD4
-  InstrItinData<IIC_VLD4,     [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>,
-  //
-  // VST
-  InstrItinData<IIC_VST,      [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0], 0>,
-                               InstrStage<1, [FU_NLSPipe]>]>,
-  //
-  // Double-register FP Unary
-  InstrItinData<IIC_VUNAD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 2]>,
-  //
-  // Quad-register FP Unary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 2]>,
-  //
-  // Double-register FP Binary
-  InstrItinData<IIC_VBIND,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
-  //
-  // Quad-register FP Binary
-  // Result written in N5, but that is relative to the last cycle of multicycle,
-  // so we use 6 for those cases
-  InstrItinData<IIC_VBINQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Move Immediate
-  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3]>,
-  //
-  // Double-register Permute Move
-  InstrItinData<IIC_VMOVD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
-  //
-  // Quad-register Permute Move
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1]>,
-  //
-  // Integer to Single-precision Move
-  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [20, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>,
-  //
-  // Integer to Lane Move
-  InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
-  //
-  // Double-register Permute
-  InstrItinData<IIC_VPERMD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>,
-  //
-  // Quad-register Permute
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>,
-  //
-  // Quad-register Permute (3 cycle issue)
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>,
-  //
-  // Double-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>,
-  //
-  // Quad-register FP Multiple-Accumulate
-  // Result written in N9, but that is relative to the last cycle of multicycle,
-  // so we use 10 for those cases
-  InstrItinData<IIC_VMACQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>,
-  //
-  // Double-register Reciprical Step
-  InstrItinData<IIC_VRECSD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [9, 2, 2]>,
-  //
-  // Quad-register Reciprical Step
-  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [10, 2, 2]>,
-  //
-  // Double-register Integer Count
-  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Count
-  // Result written in N3, but that is relative to the last cycle of multicycle,
-  // so we use 4 for those cases
-  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
-  //
-  // Double-register Integer Unary
-  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Quad-register Integer Unary
-  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2]>,
-  //
-  // Double-register Integer Q-Unary
-  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Quad-register Integer CountQ-Unary
-  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1]>,
-  //
-  // Double-register Integer Binary
-  InstrItinData<IIC_VBINiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Quad-register Integer Binary
-  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
-  //
-  // Double-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Quad-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
-  //
-  // Double-register Integer Subtract
-  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Quad-register Integer Subtract
-  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
-  //
-  // Double-register Integer Shift
-  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
-  //
-  // Quad-register Integer Shift
-  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Double-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
-  //
-  // Quad-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [5, 1, 1]>,
-  //
-  // Double-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiD,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
-  //
-  // Quad-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
-  //
-  // Double-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
-  //
-  // Double-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
-  //
-  // Quad-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
-  //
-  // Quad-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>,
-                               InstrStage<2, [FU_NLSPipe], 0>,
-                               InstrStage<3, [FU_NPipe]>], [9, 2, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NPipe]>,
-                               InstrStage<2, [FU_NLSPipe], 0>,
-                               InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>,
-  //
-  // Double-register VEXT
-  InstrItinData<IIC_VEXTD,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>,
-  //
-  // Quad-register VEXT
-  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>,
-  //
-  // VTB
-  InstrItinData<IIC_VTB1,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>,
-  InstrItinData<IIC_VTB2,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>,
-  InstrItinData<IIC_VTB3,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTB4,     [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
-  //
-  // VTBX
-  InstrItinData<IIC_VTBX1,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>,
-  InstrItinData<IIC_VTBX2,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>,
-  InstrItinData<IIC_VTBX3,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTBX4,    [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_NLSPipe]>,
-                               InstrStage<1, [FU_NPipe], 0>,
-                               InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
-]>;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 0000000..c04ee38
--- /dev/null
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMSelectionDAGInfo.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo() {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 0000000..afe9a47
--- /dev/null
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  ARMSelectionDAGInfo();
+  ~ARMSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 9e55cd8..b11580a 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -116,7 +116,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
 bool
-ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const {
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+                                 Reloc::Model RelocM) const {
   if (RelocM == Reloc::Static)
     return false;
 
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index fa56a91..288a19a 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -160,7 +160,7 @@ protected:
 
   /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
   /// symbol.
-  bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const;
+  bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 95f57b7..662e61e 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -102,8 +102,12 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
 bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
-  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
-    PM.add(createARMLoadStoreOptimizationPass());
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createARMLoadStoreOptimizationPass());
+    if (Subtarget.hasNEON())
+      PM.add(createNEONMoveFixPass());
+  }
 
   // Expand some pseudo instructions into multiple instructions to allow
   // proper scheduling.
@@ -118,8 +122,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None) {
     if (!Subtarget.isThumb1Only())
       PM.add(createIfConverterPass());
-    if (Subtarget.hasNEON())
-      PM.add(createNEONMoveFixPass());
   }
 
   if (Subtarget.isThumb2()) {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c32f16c..4e205df 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -71,8 +71,8 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 
-  virtual       ARMTargetLowering *getTargetLowering() const {
-    return const_cast<ARMTargetLowering*>(&TLInfo);
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
@@ -97,8 +97,8 @@ public:
     return &InstrInfo->getRegisterInfo();
   }
 
-  virtual ARMTargetLowering *getTargetLowering() const {
-    return const_cast<ARMTargetLowering*>(&TLInfo);
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   /// returns either Thumb1InstrInfo or Thumb2InstrInfo
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 680d032..091a3b3 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -9,6 +9,7 @@
 
 #include "ARMTargetObjectFile.h"
 #include "ARMSubtarget.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetMachine.h"
@@ -25,12 +26,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
 
   if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
     StaticCtorSection =
-      getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
-                    MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                    SectionKind::getDataRel());
+      getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
+                                 MCSectionELF::SHF_WRITE |
+                                 MCSectionELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
     StaticDtorSection =
-      getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY,
-                    MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                    SectionKind::getDataRel());
+      getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY,
+                                 MCSectionELF::SHF_WRITE |
+                                 MCSectionELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
   }
 }
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
new file mode 100644
index 0000000..f859d1b
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -0,0 +1,140 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+  
+  class ARMBaseAsmLexer : public TargetAsmLexer {
+    const MCAsmInfo &AsmInfo;
+    
+    const AsmToken &lexDefinite() {
+      return getLexer()->Lex();
+    }
+    
+    AsmToken LexTokenUAL();
+  protected:
+    typedef std::map <std::string, unsigned> rmap_ty;
+    
+    rmap_ty RegisterMap;
+    
+    void InitRegisterMap(const TargetRegisterInfo *info) {
+      unsigned numRegs = info->getNumRegs();
+
+      for (unsigned i = 0; i < numRegs; ++i) {
+        const char *regName = info->getName(i);
+        if (regName)
+          RegisterMap[regName] = i;
+      }
+    }
+    
+    unsigned MatchRegisterName(StringRef Name) {
+      rmap_ty::iterator iter = RegisterMap.find(Name.str());
+      if (iter != RegisterMap.end())
+        return iter->second;
+      else
+        return 0;
+    }
+    
+    AsmToken LexToken() {
+      if (!Lexer) {
+        SetError(SMLoc(), "No MCAsmLexer installed");
+        return AsmToken(AsmToken::Error, "", 0);
+      }
+      
+      switch (AsmInfo.getAssemblerDialect()) {
+      default:
+        SetError(SMLoc(), "Unhandled dialect");
+        return AsmToken(AsmToken::Error, "", 0);
+      case 0:
+        return LexTokenUAL();
+      }
+    }
+  public:
+    ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : TargetAsmLexer(T), AsmInfo(MAI) {
+    }
+  };
+  
+  class ARMAsmLexer : public ARMBaseAsmLexer {
+  public:
+    ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : ARMBaseAsmLexer(T, MAI) {
+      std::string tripleString("arm-unknown-unknown");
+      std::string featureString;
+      OwningPtr<const TargetMachine> 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+  
+  class ThumbAsmLexer : public ARMBaseAsmLexer {
+  public:
+    ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : ARMBaseAsmLexer(T, MAI) {
+      std::string tripleString("thumb-unknown-unknown");
+      std::string featureString;
+      OwningPtr<const TargetMachine> 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+}
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Identifier:
+  {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID) {
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    } else {
+      return AsmToken(lexedToken);
+    }
+  }
+  }
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+  RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
+  RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+}
+
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index cf55377..bfa89c4 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -812,8 +812,11 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
   return false;
 }
 
+extern "C" void LLVMInitializeARMAsmLexer();
+
 /// Force static initialization.
 extern "C" void LLVMInitializeARMAsmParser() {
   RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
   RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+  LLVMInitializeARMAsmLexer();
 }
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 308c6cf..9ba7c01 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -1,6 +1,7 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMARMAsmParser
+  ARMAsmLexer.cpp
   ARMAsmParser.cpp
   )
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 15c5294..80a9d2d 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include "ARMMachineFunctionInfo.h"
 #include "ARMMCInstLower.h"
 #include "ARMTargetMachine.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
@@ -239,7 +240,7 @@ namespace {
       } else if (ACPV->isBlockAddress()) {
         O << *GetBlockAddressSymbol(ACPV->getBlockAddress());
       } else if (ACPV->isGlobalValue()) {
-        GlobalValue *GV = ACPV->getGV();
+        const GlobalValue *GV = ACPV->getGV();
         bool isIndirect = Subtarget->isTargetDarwin() &&
           Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
         if (!isIndirect)
@@ -352,7 +353,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     return;
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    GlobalValue *GV = MO.getGlobal();
+    const GlobalValue *GV = MO.getGlobal();
 
     if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
         (TF & ARMII::MO_LO16))
@@ -504,7 +505,6 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op,
 
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    assert(ImmOffs && "Malformed indexed load / store!");
     O << "#"
       << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
       << ImmOffs;
@@ -556,7 +556,6 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op,
   }
 
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  assert(ImmOffs && "Malformed indexed load / store!");
   O << "#"
     << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
     << ImmOffs;
@@ -1110,6 +1109,24 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   
   SmallString<128> Str;
   raw_svector_ostream OS(Str);
+  if (MI->getOpcode() == ARM::DBG_VALUE) {
+    unsigned NOps = MI->getNumOperands();
+    assert(NOps==4);
+    OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason.
+    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+    OS << V.getName();
+    OS << " <- ";
+    // Frame address.  Currently handles register +- offset only.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+    OS << ']';
+    OS << "+";
+    printOperand(MI, NOps-2, OS);
+    OutStreamer.EmitRawText(OS.str());
+    return;
+  }
+
   printInstruction(MI, OS);
   OutStreamer.EmitRawText(OS.str());
   
@@ -1129,22 +1146,23 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
       // avoid out-of-range branches that are due a fundamental limitation of
       // the way symbol offsets are encoded with the current Darwin ARM
       // relocations.
-      TargetLoweringObjectFileMachO &TLOFMacho = 
-        static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+      const TargetLoweringObjectFileMachO &TLOFMacho = 
+        static_cast<const TargetLoweringObjectFileMachO &>(
+          getObjFileLowering());
       OutStreamer.SwitchSection(TLOFMacho.getTextSection());
       OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
       OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
       if (RelocM == Reloc::DynamicNoPIC) {
         const MCSection *sect =
-          TLOFMacho.getMachOSection("__TEXT", "__symbol_stub4",
-                                    MCSectionMachO::S_SYMBOL_STUBS,
-                                    12, SectionKind::getText());
+          OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     12, SectionKind::getText());
         OutStreamer.SwitchSection(sect);
       } else {
         const MCSection *sect =
-          TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub4",
-                                    MCSectionMachO::S_SYMBOL_STUBS,
-                                    16, SectionKind::getText());
+          OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     16, SectionKind::getText());
         OutStreamer.SwitchSection(sect);
       }
     }
@@ -1201,8 +1219,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
 void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     // All darwin targets use mach-o.
-    TargetLoweringObjectFileMachO &TLOFMacho =
-      static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+    const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
     MachineModuleInfoMachO &MMIMacho =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index ef5ead6..ac6331f 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -330,7 +331,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
   
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    assert(ImmOffs && "Malformed indexed load / store!");
     O << '#'
       << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
       << ImmOffs;
@@ -380,7 +380,6 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
   }
   
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  assert(ImmOffs && "Malformed indexed load / store!");
   O << '#'
     << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
     << ImmOffs;
@@ -779,3 +778,22 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
   O << '#' << MI->getOperand(OpNum).getImm();
 }
 
+void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
+}
+
+void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
+}
+
+void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
+}
+
+void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index dd006fc..be0b7c1 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -104,10 +104,10 @@ public:
   void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printHex8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
-  void printHex16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
-  void printHex32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
-  void printHex64ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {}
+  void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 
   void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);  
   // FIXME: Implement.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index bbc0095..29e66e1 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -10,6 +10,7 @@ tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
 tablegen(ARMGenDAGISel.inc -gen-dag-isel)
 tablegen(ARMGenCallingConv.inc -gen-callingconv)
 tablegen(ARMGenSubtarget.inc -gen-subtarget)
+tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
 
 add_llvm_target(ARMCodeGen
   ARMBaseInstrInfo.cpp
@@ -36,6 +37,7 @@ add_llvm_target(ARMCodeGen
   Thumb2InstrInfo.cpp
   Thumb2RegisterInfo.cpp
   Thumb2SizeReduction.cpp
+  ARMSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 47c3104..4de697e 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,6 +18,7 @@
 #include "ARMDisassembler.h"
 #include "ARMDisassemblerCore.h"
 
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
@@ -38,7 +39,9 @@
 ///
 #include "../ARMGenDecoderTables.inc"
 
-namespace llvm {
+#include "../ARMGenEDInfo.inc"
+
+using namespace llvm;
 
 /// showBitVector - Use the raw_ostream to log a diagnostic message describing
 /// the inidividual bits of the instruction.
@@ -247,27 +250,27 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
 
   case ARM::t2LDR_POST:   case ARM::t2LDR_PRE:
   case ARM::t2LDRi12:     case ARM::t2LDRi8:
-  case ARM::t2LDRs:
+  case ARM::t2LDRs:       case ARM::t2LDRT:
     return ARM::t2LDRpci;
 
   case ARM::t2LDRB_POST:  case ARM::t2LDRB_PRE:
   case ARM::t2LDRBi12:    case ARM::t2LDRBi8:
-  case ARM::t2LDRBs:
+  case ARM::t2LDRBs:      case ARM::t2LDRBT:
     return ARM::t2LDRBpci;
 
   case ARM::t2LDRH_POST:  case ARM::t2LDRH_PRE:
   case ARM::t2LDRHi12:    case ARM::t2LDRHi8:
-  case ARM::t2LDRHs:
+  case ARM::t2LDRHs:      case ARM::t2LDRHT:
     return ARM::t2LDRHpci;
 
   case ARM::t2LDRSB_POST:  case ARM::t2LDRSB_PRE:
   case ARM::t2LDRSBi12:    case ARM::t2LDRSBi8:
-  case ARM::t2LDRSBs:
+  case ARM::t2LDRSBs:      case ARM::t2LDRSBT:
     return ARM::t2LDRSBpci;
 
   case ARM::t2LDRSH_POST:  case ARM::t2LDRSH_PRE:
   case ARM::t2LDRSHi12:    case ARM::t2LDRSHi8:
-  case ARM::t2LDRSHs:
+  case ARM::t2LDRSHs:      case ARM::t2LDRSHT:
     return ARM::t2LDRSHpci;
   }
 }
@@ -404,7 +407,6 @@ bool ARMDisassembler::getInstruction(MCInst &MI,
     });
 
   ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
-
   if (!Builder)
     return false;
 
@@ -492,11 +494,11 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
     });
 
   ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
-  Builder->setSession(const_cast<Session *>(&SO));
-
   if (!Builder)
     return false;
 
+  Builder->SetSession(const_cast<Session *>(&SO));
+
   if (!Builder->Build(MI, insn))
     return false;
 
@@ -506,17 +508,37 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
 }
 
 // A8.6.50
+// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
 static unsigned short CountITSize(unsigned ITMask) {
   // First count the trailing zeros of the IT mask.
   unsigned TZ = CountTrailingZeros_32(ITMask);
-  assert(TZ <= 3 && "Encoding error");
+  if (TZ > 3) {
+    DEBUG(errs() << "Encoding error: IT Mask '0000'");
+    return 0;
+  }
   return (4 - TZ);
 }
 
-/// Init ITState.
-void Session::InitIT(unsigned short bits7_0) {
+/// Init ITState.  Note that at least one bit is always 1 in mask.
+bool Session::InitIT(unsigned short bits7_0) {
   ITCounter = CountITSize(slice(bits7_0, 3, 0));
+  if (ITCounter == 0)
+    return false;
+
+  // A8.6.50 IT
+  unsigned short FirstCond = slice(bits7_0, 7, 4);
+  if (FirstCond == 0xF) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
+    return false;
+  }
+  if (FirstCond == 0xE && ITCounter != 1) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
+    return false;
+  }
+
   ITState = bits7_0;
+
+  return true;
 }
 
 /// Update ITState if necessary.
@@ -547,4 +569,10 @@ extern "C" void LLVMInitializeARMDisassembler() {
                                          createThumbDisassembler);
 }
 
-} // namespace llvm
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.h b/lib/Target/ARM/Disassembler/ARMDisassembler.h
index 44592e0..0a74a38 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.h
@@ -24,6 +24,8 @@ class MCInst;
 class MemoryObject;
 class raw_ostream;
   
+struct EDInstInfo;
+  
 /// ARMDisassembler - ARM disassembler for all ARM platforms.
 class ARMDisassembler : public MCDisassembler {
 public:
@@ -42,6 +44,9 @@ public:
                       const MemoryObject &region,
                       uint64_t address,
                       raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
 private:
 };
 
@@ -55,7 +60,7 @@ public:
   Session() : ITCounter(0), ITState(0) {}
   ~Session() {}
   /// InitIT - Initializes ITCounter/ITState.
-  void InitIT(unsigned short bits7_0);
+  bool InitIT(unsigned short bits7_0);
   /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
   void UpdateIT();
 
@@ -82,6 +87,9 @@ public:
                       const MemoryObject &region,
                       uint64_t address,
                       raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
 private:
   Session SO;
 };
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index db921ef..adb7795 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -13,8 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "arm-disassembler"
+
 #include "ARMDisassemblerCore.h"
 #include "ARMAddressingModes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
 /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
@@ -75,7 +79,7 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) {
 // Return the register enum Based on RegClass and the raw register number.
 // For DRegPair, see comments below.
 // FIXME: Auto-gened?
-static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister,
+static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
                                 bool DRegPair = false) {
 
   if (DRegPair && RegClassID == ARM::QPRRegClassID) {
@@ -345,7 +349,9 @@ static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister,
     }
     break;
   }
-  assert(0 && "Invalid (RegClassID, RawRegister) combination");
+  DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
+  // Encoding error.  Mark the builder with error code != 0.
+  B->SetErr(-1);
   return 0;
 }
 
@@ -509,7 +515,7 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Inst{3-0} => Rm
 // Inst{11-8} => Rs
 static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   unsigned short NumDefs = TID.getNumDefs();
@@ -529,26 +535,26 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (NumDefs == 2) {
     assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
            "Expect 4th register operand");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
 
   // The destination register: RdHi{19-16} or Rd{19-16}.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
   // The two src regsiters: Rn{3-0}, then Rm{11-8}.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
   OpIdx += 3;
 
   // Many multiply instructions (e.g., MLA) have three src registers.
   // The third register operand is Ra{15-12}.
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
@@ -610,7 +616,7 @@ static inline unsigned GetCopOpc(uint32_t insn) {
 // and friends
 //
 static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr");
 
@@ -631,7 +637,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
 
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
 
     if (PW) {
@@ -651,11 +657,11 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
                         : MCOperand::CreateReg(
-                            getRegisterEnum(ARM::GPRRegClassID,
+                            getRegisterEnum(B, ARM::GPRRegClassID,
                                             decodeRd(insn))));
 
     MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
-                                getRegisterEnum(ARM::GPRRegClassID,
+                                getRegisterEnum(B, ARM::GPRRegClassID,
                                                 decodeRn(insn)))
                             : MCOperand::CreateImm(decodeRn(insn)));
 
@@ -688,18 +694,19 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 // SRSW/SRS: addrmode4:$addr mode_imm
 // RFEW/RFE: addrmode4:$addr Rn
 static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (CoprocessorOpcode(Opcode))
-    return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   // MRS and MRSsys take one GPR reg Rd.
   if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
     assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     NumOpsAdded = 1;
     return true;
@@ -708,7 +715,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::BXJ) {
     assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     NumOpsAdded = 1;
     return true;
@@ -717,7 +724,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) {
     assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
     NumOpsAdded = 2;
@@ -748,7 +755,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
       MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
     else
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRn(insn))));
     NumOpsAdded = 3;
     return true;
@@ -791,9 +798,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // BLXr9, BXr9
 // BRIND, BX_RET
 static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -806,7 +815,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) {
     assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     OpIdx = 1;
     return true;
@@ -817,9 +826,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // InOperandList with GPR:$target and GPR:$idx regs.
 
     assert(NumOps == 4 && "Expect 4 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     // Fill in the two remaining imm operands to signify build completion.
@@ -835,7 +844,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // InOperandList with GPR::$target reg.
 
     assert(NumOps == 3 && "Expect 3 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     // Fill in the two remaining imm operands to signify build completion.
@@ -852,13 +861,13 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // See also ARMAddressingModes.h (Addressing Mode #2).
 
     assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
 
     ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
 
     // Disassemble the offset reg (Rm), shift type, and immediate shift length.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     // Inst{6-5} encodes the shift opcode.
     ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
@@ -882,14 +891,19 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   return false;
 }
 
-static inline uint32_t getBFCInvMask(uint32_t insn) {
+static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
   uint32_t lsb = slice(insn, 11, 7);
   uint32_t msb = slice(insn, 20, 16);
   uint32_t Val = 0;
-  assert(lsb <= msb && "Encoding error: lsb > msb");
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
+
   for (uint32_t i = lsb; i <= msb; ++i)
     Val |= (1 << i);
-  return ~Val;
+  mask = ~Val;
+  return true;
 }
 
 static inline bool SaturateOpcode(unsigned Opcode) {
@@ -924,7 +938,7 @@ static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
 // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
 // They are QADD, QDADD, QDSUB, and QSUB.
 static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   unsigned short NumDefs = TID.getNumDefs();
@@ -936,7 +950,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Disassemble register def if there is one.
   if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
@@ -949,7 +963,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (SaturateOpcode(Opcode)) {
     MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
 
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
@@ -977,14 +991,18 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
     // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
     MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
-                                       : getRegisterEnum(ARM::GPRRegClassID,
+                                       : getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRm(insn))));
-    MI.addOperand(MCOperand::CreateImm(getBFCInvMask(insn)));
+    uint32_t mask = 0;
+    if (!getBFCInvMask(insn, mask))
+      return false;
+
+    MI.addOperand(MCOperand::CreateImm(mask));
     OpIdx += 2;
     return true;
   }
   if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
@@ -1000,7 +1018,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::GPRRegClassID,
+                    getRegisterEnum(B, ARM::GPRRegClassID,
                                     RmRn ? decodeRm(insn) : decodeRn(insn))));
     ++OpIdx;
   }
@@ -1021,7 +1039,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // routed here as well.
     // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::GPRRegClassID,
+                    getRegisterEnum(B, ARM::GPRRegClassID,
                                     RmRn? decodeRn(insn) : decodeRm(insn))));
     ++OpIdx;
   } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
@@ -1046,7 +1064,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   unsigned short NumDefs = TID.getNumDefs();
@@ -1058,7 +1076,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Disassemble register def if there is one.
   if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
     ++OpIdx;
   }
@@ -1071,7 +1089,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (!isUnary) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1094,11 +1112,11 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
   unsigned Rs = slice(insn, 4, 4);
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   if (Rs) {
     // Register-controlled shifts: [Rm, Rs, shift].
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRs(insn))));
     // Inst{6-5} encodes the shift opcode.
     ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
@@ -1121,24 +1139,26 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   bool isPrePost = isPrePostLdSt(TID.TSFlags);
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
   if (isPrePost && isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1149,7 +1169,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
@@ -1157,7 +1177,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (isPrePost && !isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1170,7 +1190,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
          "Reg operand expected");
   assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
          && "Index mode or tied_to operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1194,7 +1214,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     MI.addOperand(MCOperand::CreateImm(Offset));
   } else {
     // Disassemble the offset reg (Rm), shift type, and immediate shift length.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     // Inst{6-5} encodes the shift opcode.
     ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
@@ -1212,13 +1232,13 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
 }
 
 static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
 }
 
 static bool HasDualReg(unsigned Opcode) {
@@ -1232,24 +1252,26 @@ static bool HasDualReg(unsigned Opcode) {
 }
 
 static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   bool isPrePost = isPrePostLdSt(TID.TSFlags);
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
   if (isPrePost && isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1262,13 +1284,13 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   // Fill in LDRD and STRD's second operand.
   if (DualReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn) + 1)));
     ++OpIdx;
   }
@@ -1277,7 +1299,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (isPrePost && !isStore) {
     assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1290,7 +1312,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
          "Reg operand expected");
   assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
          && "Index mode or tied_to operand expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1315,7 +1337,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     MI.addOperand(MCOperand::CreateImm(Offset));
   } else {
     // Disassemble the offset reg (Rm).
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0);
     MI.addOperand(MCOperand::CreateImm(Offset));
@@ -1326,13 +1348,14 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                B);
 }
 
 static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
 }
 
 // The algorithm for disassembly of LdStMulFrm is different from others because
@@ -1340,7 +1363,7 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // and operand 1 (the AM4 mode imm).  After operand 3, we need to populate the
 // reglist with each affected register encoded as an MCOperand.
 static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
 
@@ -1348,7 +1371,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   OpIdx = 0;
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base, if necessary.
   if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) {
@@ -1372,7 +1395,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned RegListBits = insn & ((1 << 16) - 1);
   for (unsigned i = 0; i < 16; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -1388,9 +1411,11 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // SWP, SWPB:             Rd Rm Rn
 static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1404,29 +1429,29 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   // Store register Exclusive needs a source operand.
   if (isStore) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     ++OpIdx;
 
     if (isDW) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRm(insn)+1)));
       ++OpIdx;
     }
   } else if (isDW) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn)+1)));
     ++OpIdx;
   }
 
   // Finally add the pointer operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1438,7 +1463,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
 // RBIT, REV, REV16, REVSH: Rd Rm
 static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1452,18 +1477,18 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   if (ThreeReg) {
     assert(NumOps >= 4 && "Expect >= 4 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1485,7 +1510,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
 // three register operand form.  Otherwise, Rn=0b1111 and only Rm is used.
 static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1499,17 +1524,17 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1591,7 +1616,7 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
 // VCVTDS, VCVTSD: converts between double-precision and single-precision
 // The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
 static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
 
@@ -1606,7 +1631,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   bool isSP = (RegClass == ARM::SPRRegClassID);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRd(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
   ++OpIdx;
 
   // Early return for compare with zero instructions.
@@ -1620,7 +1645,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   isSP = (RegClass == ARM::SPRRegClassID);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRm(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
   ++OpIdx;
 
   return true;
@@ -1631,7 +1656,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // InOperandList to that of the dst.  As far as asm printing is concerned, this
 // tied_to operand is simply skipped.
 static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
 
@@ -1647,7 +1672,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   bool isSP = (RegClass == ARM::SPRRegClassID);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRd(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
   ++OpIdx;
 
   // Skip tied_to operand constraint.
@@ -1658,11 +1683,11 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRn(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
   ++OpIdx;
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass, decodeVFPRm(insn, isSP))));
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
   ++OpIdx;
 
   return true;
@@ -1675,12 +1700,13 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // A8.6.297 vcvt (floating-point and fixed-point)
 // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
 static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
   bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
   bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
@@ -1692,7 +1718,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
     int size = slice(insn, 7, 7) == 0 ? 16 : 32;
     int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(RegClassID,
+                    getRegisterEnum(B, RegClassID,
                                     decodeVFPRd(insn, SP))));
 
     assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
@@ -1712,15 +1738,15 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
     if (slice(insn, 18, 18) == 1) { // to_integer operation
       d = decodeVFPRd(insn, true /* Is Single Precision */);
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(ARM::SPRRegClassID, d)));
+                      getRegisterEnum(B, ARM::SPRRegClassID, d)));
       m = decodeVFPRm(insn, SP);
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, m)));
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
     } else {
       d = decodeVFPRd(insn, SP);
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, d)));
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
       m = decodeVFPRm(insn, true /* Is Single Precision */);
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(ARM::SPRRegClassID, m)));
+                      getRegisterEnum(B, ARM::SPRRegClassID, m)));
     }
     NumOpsAdded = 2;
   }
@@ -1731,13 +1757,13 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVRS - A8.6.330
 // Rt => Rd; Sn => UInt(Vn:N)
 static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                      decodeVFPRn(insn, true))));
   NumOpsAdded = 2;
   return true;
@@ -1749,29 +1775,29 @@ static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVRRS - A8.6.331
 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
 static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   OpIdx = 2;
 
   if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
     unsigned Sm = decodeVFPRm(insn, true);
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm)));
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm+1)));
     OpIdx += 2;
   } else {
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::DPRRegClassID,
+                    getRegisterEnum(B, ARM::DPRRegClassID,
                                     decodeVFPRm(insn, false))));
     ++OpIdx;
   }
@@ -1781,13 +1807,13 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVSR - A8.6.330
 // Rt => Rd; Sn => UInt(Vn:N)
 static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                      decodeVFPRn(insn, true))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   NumOpsAdded = 2;
   return true;
@@ -1799,7 +1825,7 @@ static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VMOVRRS - A8.6.331
 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
 static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
 
@@ -1810,21 +1836,21 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
     unsigned Sm = decodeVFPRm(insn, true);
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm)));
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
                                                        Sm+1)));
     OpIdx += 2;
   } else {
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::DPRRegClassID,
+                    getRegisterEnum(B, ARM::DPRRegClassID,
                                     decodeVFPRm(insn, false))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   OpIdx += 2;
   return true;
@@ -1833,7 +1859,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // VFP Load/Store Instructions.
 // VLDRD, VLDRS, VSTRD, VSTRS
 static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
 
@@ -1843,9 +1869,9 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Extract Dd/Sd for operand 0.
   unsigned RegD = decodeVFPRd(insn, isSPVFP);
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, RegD)));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
   MI.addOperand(MCOperand::CreateReg(Base));
 
   // Next comes the AM5 Opcode.
@@ -1865,7 +1891,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
 static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5");
 
@@ -1873,7 +1899,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   OpIdx = 0;
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base, if necessary.
   if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD ||
@@ -1886,6 +1912,12 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Next comes the AM5 Opcode.
   ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+  // Must be either "ia" or "db" submode.
+  if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
+    DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
+    return false;
+  }
+
   unsigned char Imm8 = insn & 0xFF;
   MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
 
@@ -1906,7 +1938,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Fill the variadic part of reglist.
   unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
   for (unsigned i = 0; i < Regs; ++i) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
                                                        RegD + i)));
     ++OpIdx;
   }
@@ -1920,7 +1952,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // FCONSTS (SPR and a VFPf32Imm operand)
 // VMRS/VMSR (GPR operand)
 static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1935,13 +1967,13 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned RegEnum = 0;
   switch (OpInfo[0].RegClass) {
   case ARM::DPRRegClassID:
-    RegEnum = getRegisterEnum(ARM::DPRRegClassID, decodeVFPRd(insn, false));
+    RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
     break;
   case ARM::SPRRegClassID:
-    RegEnum = getRegisterEnum(ARM::SPRRegClassID, decodeVFPRd(insn, true));
+    RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
     break;
   case ARM::GPRRegClassID:
-    RegEnum = getRegisterEnum(ARM::GPRRegClassID, decodeRd(insn));
+    RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
     break;
   default:
     assert(0 && "Invalid reg class id");
@@ -1986,7 +2018,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // D = Inst{22}, Vd = Inst{15-12}
 static unsigned decodeNEONRd(uint32_t insn) {
   return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
 }
 
 // Extract/Decode NEON N/Vn:
@@ -1997,7 +2029,7 @@ static unsigned decodeNEONRd(uint32_t insn) {
 // N = Inst{7}, Vn = Inst{19-16}
 static unsigned decodeNEONRn(uint32_t insn) {
   return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
 }
 
 // Extract/Decode NEON M/Vm:
@@ -2008,7 +2040,7 @@ static unsigned decodeNEONRn(uint32_t insn) {
 // M = Inst{5}, Vm = Inst{3-0}
 static unsigned decodeNEONRm(uint32_t insn) {
   return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
-    | (insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask;
+    | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
 }
 
 namespace {
@@ -2072,7 +2104,7 @@ static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
   case ESize64: {
     for (unsigned i = 0; i < 8; ++i)
       if ((Imm8 >> i) & 1)
-        Imm64 |= 0xFF << 8*i;
+        Imm64 |= (uint64_t)0xFF << 8*i;
     break;
   }
   default:
@@ -2200,6 +2232,22 @@ static unsigned decodeN3VImm(uint32_t insn) {
   return (insn >> 8) & 0xF;
 }
 
+static bool UseDRegPair(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::VLD1q8_UPD:
+  case ARM::VLD1q16_UPD:
+  case ARM::VLD1q32_UPD:
+  case ARM::VLD1q64_UPD:
+  case ARM::VST1q8_UPD:
+  case ARM::VST1q16_UPD:
+  case ARM::VST1q32_UPD:
+  case ARM::VST1q64_UPD:
+    return true;
+  }
+}
+
 // VLD*
 //   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
 // VLD*LN*
@@ -2211,7 +2259,8 @@ static unsigned decodeN3VImm(uint32_t insn) {
 //
 // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
 static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
+    BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2239,7 +2288,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
   // LLVM Addressing Mode #6.
   unsigned RmEnum = 0;
   if (WB && Rm != 13)
-    RmEnum = getRegisterEnum(ARM::GPRRegClassID, Rm);
+    RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
 
   if (Store) {
     // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
@@ -2248,14 +2297,14 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
            "Reg operand expected");
 
     if (WB) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          Rn)));
       ++OpIdx;
     }
 
     assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        Rn)));
     MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
     OpIdx += 2;
@@ -2272,10 +2321,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     RegClass = OpInfo[OpIdx].RegClass;
     while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
-      if (Opcode >= ARM::VST1q16 && Opcode <= ARM::VST1q8)
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true)));
-      else
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd)));
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd,
+                                      UseDRegPair(Opcode))));
       Rd += Inc;
       ++OpIdx;
     }
@@ -2293,23 +2341,22 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     RegClass = OpInfo[0].RegClass;
 
     while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
-      if (Opcode >= ARM::VLD1q16 && Opcode <= ARM::VLD1q8)
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true)));
-      else
-        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd)));
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd,
+                                      UseDRegPair(Opcode))));
       Rd += Inc;
       ++OpIdx;
     }
 
     if (WB) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          Rn)));
       ++OpIdx;
     }
 
     assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        Rn)));
     MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
     OpIdx += 2;
@@ -2342,7 +2389,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Find out about double-spaced-ness of the Opcode and pass it on to
 // DisassembleNLdSt0().
 static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const StringRef Name = ARMInsts[Opcode].Name;
   bool DblSpaced = false;
@@ -2377,13 +2424,13 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
     
   }
   return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
-                           slice(insn, 21, 21) == 0, DblSpaced);
+                           slice(insn, 21, 21) == 0, DblSpaced, B);
 }
 
 // VMOV (immediate)
 //   Qd/Dd imm
 static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2395,7 +2442,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
          "Expect 1 reg operand followed by 1 imm operand");
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[0].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
                                                      decodeNEONRd(insn))));
 
   ElemSize esize = ESizeNA;
@@ -2415,6 +2462,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
   case ARM::VMOVv1i64:
   case ARM::VMOVv2i64:
     esize = ESize64;
+    break;
   default:
     assert(0 && "Unreachable code!");
     return false;
@@ -2451,7 +2499,7 @@ enum N2VFlag {
 //
 // Others
 static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag = N2V_None) {
+    unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opc];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2478,7 +2526,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   }
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2490,7 +2538,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   }
 
   // Dm = Inst{5:3-0} => NEON Rm
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRm(insn))));
   ++OpIdx;
 
@@ -2523,21 +2571,22 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
 }
 
 static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded);
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_None, B);
 }
 static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
-                                N2V_VectorConvert_Between_Float_Fixed);
+                                N2V_VectorConvert_Between_Float_Fixed, B);
 }
 static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
-                                N2V_VectorDupLane);
+                                N2V_VectorDupLane, B);
 }
 
 // Vector Shift [Accumulate] Instructions.
@@ -2547,7 +2596,7 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
 // VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
 //
 static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift) {
+    unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2564,7 +2613,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
   OpIdx = 0;
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2579,7 +2628,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
          "Reg operand expected");
 
   // Qm/Dm = Inst{5:3-0} => NEON Rm
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRm(insn))));
   ++OpIdx;
 
@@ -2611,15 +2660,17 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
 
 // Left shift instructions.
 static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
+                                 B);
 }
 // Right shift instructions have different shift amount interpretation.
 static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                 B);
 }
 
 namespace {
@@ -2644,7 +2695,7 @@ enum N3VFlag {
 //
 // Others
 static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag = N3V_None) {
+    unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2673,7 +2724,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // Qd/Dd = Inst{22:15-12} => NEON Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2688,7 +2739,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // or
   // Dm = Inst{5:3-0} => NEON Rm
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(OpInfo[OpIdx].RegClass,
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                   VdVnVm ? decodeNEONRn(insn)
                                          : decodeNEONRm(insn))));
   ++OpIdx;
@@ -2708,7 +2759,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
                       : decodeNEONRn(insn);
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(OpInfo[OpIdx].RegClass, m)));
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
   ++OpIdx;
 
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
@@ -2732,27 +2783,28 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_None, B);
 }
 static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                  N3V_VectorShift);
+                                  N3V_VectorShift, B);
 }
 static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                  N3V_VectorExtract);
+                                  N3V_VectorExtract, B);
 }
 static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                  N3V_Multiply_By_Scalar);
+                                  N3V_Multiply_By_Scalar, B);
 }
 
 // Vector Table Lookup
@@ -2762,10 +2814,11 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
 // VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
 // VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
 static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
@@ -2786,7 +2839,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned Len = slice(insn, 9, 8) + 1;
 
   // Dd (the destination vector)
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
@@ -2801,7 +2854,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   for (unsigned i = 0; i < Len; ++i) {
     assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
            "Reg operand expected");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                        Rn + i)));
     ++OpIdx;
   }
@@ -2809,7 +2862,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Dm (the index vector)
   assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
          "Reg operand (index vector) expected");
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRm(insn))));
   ++OpIdx;
 
@@ -2825,13 +2878,13 @@ static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Vector Get Lane (move scalar to ARM core register) Instructions.
 // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
 static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
-  assert(NumDefs == 1 && NumOps >= 3 &&
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          OpInfo[2].RegClass == 0 &&
@@ -2843,11 +2896,11 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                                 : ESize32);
 
   // Rt = Inst{15-12} => ARM Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   // Dn = Inst{7:19-16} => NEON Rn
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRn(insn))));
 
   MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
@@ -2859,13 +2912,13 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Vector Set Lane (move ARM core register to scalar) Instructions.
 // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
 static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
   const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
 
-  assert(NumDefs == 1 && NumOps >= 3 &&
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
@@ -2879,14 +2932,14 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                         : ESize32);
 
   // Dd = Inst{7:19-16} => NEON Rn
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
                                                      decodeNEONRn(insn))));
 
   // TIED_TO operand.
   MI.addOperand(MCOperand::CreateReg(0));
 
   // Rt = Inst{15-12} => ARM Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
@@ -2898,7 +2951,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Vector Duplicate Instructions (from ARM core register to all elements).
 // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
 static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -2911,11 +2964,11 @@ static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned RegClass = OpInfo[0].RegClass;
 
   // Qd/Dd = Inst{7:19-16} => NEON Rn
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
                                                      decodeNEONRn(insn))));
 
   // Rt = Inst{15-12} => ARM Rd
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   NumOpsAdded = 2;
@@ -2945,13 +2998,13 @@ static inline bool PreLoadOpcode(unsigned Opcode) {
 }
 
 static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   // Preload Data/Instruction requires either 2 or 4 operands.
   // PLDi, PLDWi, PLIi:                Rn [+/-]imm12 add = (U == '1')
   // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
   if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) {
@@ -2961,7 +3014,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     MI.addOperand(MCOperand::CreateImm(Offset));
     NumOpsAdded = 2;
   } else {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
 
     ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -2982,7 +3035,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (MemBarrierInstr(insn))
     return true;
@@ -3031,7 +3084,7 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   if (PreLoadOpcode(Opcode))
-    return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   assert(0 && "Unexpected misc instruction!");
   return false;
@@ -3147,7 +3200,7 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
   unsigned NumOpsAdded = 0;
   bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
 
-  if (!OK) return false;
+  if (!OK || this->Err != 0) return false;
   if (NumOpsAdded >= NumOps)
     return true;
 
@@ -3156,6 +3209,49 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
   return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
 }
 
+// A8.3 Conditional execution
+// A8.3.1 Pseudocode details of conditional execution
+// Condition bits '111x' indicate the instruction is always executed.
+static uint32_t CondCode(uint32_t CondField) {
+  if (CondField == 0xF)
+    return ARMCC::AL;
+  return CondField;
+}
+
+/// DoPredicateOperands - DoPredicateOperands process the predicate operands
+/// of some Thumb instructions which come before the reglist operands.  It
+/// returns true if the two predicate operands have been processed.
+bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
+    uint32_t /* insn */, unsigned short NumOpsRemaining) {
+
+  assert(NumOpsRemaining > 0 && "Invalid argument");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned Idx = MI.getNumOperands();
+
+  // First, we check whether this instr specifies the PredicateOperand through
+  // a pair of TargetOperandInfos with isPredicate() property.
+  if (NumOpsRemaining >= 2 &&
+      OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+      OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+  {
+    // If we are inside an IT block, get the IT condition bits maintained via
+    // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+    // See also A2.5.2.
+    if (InITBlock())
+      MI.addOperand(MCOperand::CreateImm(GetITCond()));
+    else
+      MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    return true;
+  }
+
+  return false;
+}
+  
+/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+/// the possible Predicate and SBitModifier, to build the remaining MCOperand
+/// constituents.
 bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOpsRemaining) {
 
@@ -3183,27 +3279,24 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
         //
         // A8.6.16 B
         if (Name == "t2Bcc")
-          MI.addOperand(MCOperand::CreateImm(slice(insn, 25, 22)));
+          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22))));
         else if (Name == "tBcc")
-          MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
+          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8))));
         else
           MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
       } else {
-        // ARM Instructions.  Check condition field.
-        int64_t CondVal = getCondField(insn);
-        if (CondVal == 0xF)
-          MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-        else
-          MI.addOperand(MCOperand::CreateImm(CondVal));
+        // ARM instructions get their condition field from Inst{31-28}.
+        MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
       }
     }
     MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
     Idx += 2;
     NumOpsRemaining -= 2;
-    if (NumOpsRemaining == 0)
-      return true;
   }
 
+  if (NumOpsRemaining == 0)
+    return true;
+
   // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
   if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
     MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
@@ -3224,7 +3317,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
   if (!SP) return Status;
 
   if (Opcode == ARM::t2IT)
-    SP->InitIT(slice(insn, 7, 0));
+    Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
   else if (InITBlock())
     SP->UpdateIT();
 
@@ -3234,7 +3327,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
 /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
 ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
                                      unsigned short num)
-  : Opcode(opc), Format(format), NumOps(num), SP(0) {
+  : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
   unsigned Idx = (unsigned)format;
   assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
   Disasm = FuncPtrs[Idx];
@@ -3246,6 +3339,11 @@ ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
 /// are responsible for freeing up of the allocated memory.  Cacheing can be
 /// performed by the API clients to improve performance.
 ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
+  // For "Unknown format", fail by returning a NULL pointer.
+  if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
+    DEBUG(errs() << "Unknown format\n");
+    return 0;
+  }
 
   return new ARMBasicMCBuilder(Opcode, Format,
                                ARMInsts[Opcode].getNumOperands());
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 3075230..b1d90df 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -171,30 +171,51 @@ typedef ARMBasicMCBuilder *BO;
 typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
 
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+
 /// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
 /// knows how to build up the MCOperand list.
 class ARMBasicMCBuilder {
+  friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
   unsigned Opcode;
   ARMFormat Format;
   unsigned short NumOps;
   DisassembleFP Disasm;
   Session *SP;
+  int Err; // !=0 if the builder encounters some error condition during build.
+
+private:
+  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
 
 public:
   ARMBasicMCBuilder(ARMBasicMCBuilder &B)
     : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
-      SP(B.SP)
-  {}
-
-  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
-  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
+      SP(B.SP) {
+    Err = 0;
+  }
 
   virtual ~ARMBasicMCBuilder() {}
 
-  void setSession(Session *sp) {
+  void SetSession(Session *sp) {
     SP = sp;
   }
 
+  void SetErr(int ErrCode) {
+    Err = ErrCode;
+  }
+
+  /// DoPredicateOperands - DoPredicateOperands process the predicate operands
+  /// of some Thumb instructions which come before the reglist operands.  It
+  /// returns true if the two predicate operands have been processed.
+  bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
+      uint32_t insn, unsigned short NumOpsRemaning);
+  
   /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
   /// the possible Predicate and SBitModifier, to build the remaining MCOperand
   /// constituents.
@@ -236,13 +257,6 @@ private:
   }
 };
 
-/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
-/// infrastructure of an MCInst given the Opcode and Format of the instr.
-/// Return NULL if it fails to create/return a proper builder.  API clients
-/// are responsible for freeing up of the allocated memory.  Cacheing can be
-/// performed by the API clients to improve performance.
-extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
-
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/ARM/Disassembler/Makefile b/lib/Target/ARM/Disassembler/Makefile
new file mode 100644
index 0000000..031b6ac
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDisassembler
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 481f25d..4b2e308 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -193,14 +193,18 @@ static inline unsigned getShiftAmtBits(uint32_t insn) {
 // A8.6.17 BFC
 // Encoding T1 ARMv6T2, ARMv7
 // LLVM-specific encoding for #<lsb> and #<width>
-static inline uint32_t getBitfieldInvMask(uint32_t insn) {
+static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
   uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
   uint32_t msb = getMsb(insn);
   uint32_t Val = 0;
-  assert(lsb <= msb && "Encoding error: lsb > msb");
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
   for (uint32_t i = lsb; i <= msb; ++i)
     Val |= (1 << i);
-  return ~Val;
+  mask = ~Val;
+  return true;
 }
 
 // A8.4 Shifts applied to a register
@@ -342,7 +346,7 @@ static inline unsigned decodeRotate(uint32_t insn) {
 // Special case:
 // tMOVSr:                  tRd tRn
 static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -360,14 +364,14 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Add the destination operand.
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(ARM::tGPRRegClassID,
+                  getRegisterEnum(B, ARM::tGPRRegClassID,
                                   UseRt ? getT1tRt(insn) : getT1tRd(insn))));
   ++OpIdx;
 
   // Check whether the next operand to be added is a CCR Register.
   if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
     assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
-    MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR));
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
     ++OpIdx;
   }
 
@@ -376,7 +380,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
     // For UseRt, the reg operand is tied to the first reg operand.
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::tGPRRegClassID,
+                    getRegisterEnum(B, ARM::tGPRRegClassID,
                                     UseRt ? getT1tRt(insn) : getT1tRn(insn))));
     ++OpIdx;
   }
@@ -388,7 +392,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
   // The next available operand is either a reg operand or an imm operand.
   if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
     // Three register operand instructions.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                        getT1tRm(insn))));
   } else {
     assert(OpInfo[OpIdx].RegClass == 0 &&
@@ -409,7 +413,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tMVN, tRSB:        tRd CPSR tRn
 // Others:            tRd CPSR tRd(TIED_TO) tRn
 static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -423,14 +427,14 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
          && "Invalid arguments");
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRd(insn))));
   ++OpIdx;
 
   // Check whether the next operand to be added is a CCR Register.
   if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
     assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
-    MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR));
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
     ++OpIdx;
   }
 
@@ -449,7 +453,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Process possible next reg operand.
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
     // Add tRn operand.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                        getT1tRn(insn))));
     ++OpIdx;
   }
@@ -466,7 +470,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tBX_RET_vararg: Rm
 // tBLXr_r9: Rm
 static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   // tBX_RET has 0 operand.
   if (NumOps == 0)
@@ -474,7 +478,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // BX/BLX has 1 reg operand: Rm.
   if (NumOps == 1) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        getT1Rm(insn))));
     NumOpsAdded = 1;
     return true;
@@ -489,7 +493,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Add the destination operand.
   unsigned RegClass = OpInfo[OpIdx].RegClass;
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass,
+                  getRegisterEnum(B, RegClass,
                                   IsGPR(RegClass) ? getT1Rd(insn)
                                                   : getT1tRd(insn))));
   ++OpIdx;
@@ -509,7 +513,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert(OpIdx < NumOps && "More operands expected");
   RegClass = OpInfo[OpIdx].RegClass;
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(RegClass,
+                  getRegisterEnum(B, RegClass,
                                   IsGPR(RegClass) ? getT1Rm(insn)
                                                   : getT1tRn(insn))));
   ++OpIdx;
@@ -521,9 +525,10 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tLDRpci: tRt imm8*4
 static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
          (OpInfo[1].RegClass == 0 &&
@@ -532,7 +537,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
          && "Invalid arguments");
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
 
   // And the (imm8 << 2) operand.
@@ -564,7 +569,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Load/Store Register (reg|imm):      tRd tRn imm5 tRm
 // Load Register Signed Byte|Halfword: tRd tRn tRm
 static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -581,9 +586,9 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
          && "Expect >= 2 operands and first two as thumb reg operands");
 
   // Add the destination reg and the base reg.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRn(insn))));
   OpIdx = 2;
 
@@ -603,9 +608,10 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
   // The next reg operand is tRm, the offset.
   assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
          && "Thumb reg operand expected");
-  MI.addOperand(MCOperand::CreateReg(Imm5 ? 0
-                                          : getRegisterEnum(ARM::tGPRRegClassID,
-                                                            getT1tRm(insn))));
+  MI.addOperand(MCOperand::CreateReg(
+                  Imm5 ? 0
+                       : getRegisterEnum(B, ARM::tGPRRegClassID,
+                                         getT1tRm(insn))));
   ++OpIdx;
 
   return true;
@@ -615,12 +621,13 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
 //
 // Load/Store Register SP relative: tRt ARM::SP imm8
 static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
-         && "Invalid opcode");
+         && "Unexpected opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -630,7 +637,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
           !OpInfo[2].isOptionalDef())
          && "Invalid arguments");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
   MI.addOperand(MCOperand::CreateReg(ARM::SP));
   MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
@@ -643,11 +650,12 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tADDrPCi: tRt imm8
 static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert(Opcode == ARM::tADDrPCi && "Invalid opcode");
+  assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
          (OpInfo[1].RegClass == 0 &&
@@ -655,7 +663,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
           !OpInfo[1].isOptionalDef())
          && "Invalid arguments");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
   MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
   NumOpsAdded = 2;
@@ -667,11 +675,12 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tADDrSPi: tRt ARM::SP imm8
 static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert(Opcode == ARM::tADDrSPi && "Invalid opcode");
+  assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -681,7 +690,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
           !OpInfo[2].isOptionalDef())
          && "Invalid arguments");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRt(insn))));
   MI.addOperand(MCOperand::CreateReg(ARM::SP));
   MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
@@ -697,23 +706,27 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
 // "low registers" is specified by Inst{7-0}
 // lr|pc is specified by Inst{8}
 static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Invalid opcode");
+  assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
 
   unsigned &OpIdx = NumOpsAdded;
 
   // Handling the two predicate operands before the reglist.
-  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-  OpIdx = 2;
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
 
-  // Fill the variadic part of reglist.
   unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
     | slice(insn, 7, 0);
+
+  // Fill the variadic part of reglist.
   for (unsigned i = 0; i < 16; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -735,13 +748,13 @@ static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
 //   no operand
 // Others:           tRd tRn
 static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (NumOps == 0)
     return true;
 
   if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
-    return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -799,16 +812,16 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
          && "Expect >=2 operands");
 
   // Add the destination operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                      getT1tRd(insn))));
 
   if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
     // Two register instructions.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                        getT1tRn(insn))));
   } else {
     // CBNZ, CBZ
-    assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) && "Invalid opcode");
+    assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
     MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
   }
 
@@ -823,42 +836,47 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
 // tLDM:              tRt AM4ModeImm Pred-Imm Pred-CCR register_list
 static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD ||
-          Opcode == ARM::tSTM_UPD) && "Invalid opcode");
+          Opcode == ARM::tSTM_UPD) && "Unexpected opcode");
 
   unsigned &OpIdx = NumOpsAdded;
 
   unsigned tRt = getT1tRt(insn);
-  unsigned RegListBits = slice(insn, 7, 0);
 
   OpIdx = 0;
 
   // WB register, if necessary.
   if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        tRt)));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      tRt)));
   ++OpIdx;
 
   // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1
+  // A8.6.53 STM / STMIA / STMEA - Encoding T1
   MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)));
   ++OpIdx;
 
   // Handling the two predicate operands before the reglist.
-  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-  OpIdx += 2;
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = slice(insn, 7, 0);
 
   // Fill the variadic part of reglist.
   for (unsigned i = 0; i < 8; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -868,13 +886,15 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
 }
 
 static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
-  return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
 }
 
 static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
-  return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded);
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
 }
 
 // A8.6.16 B Encoding T1
@@ -885,12 +905,14 @@ static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tSVC: imm8 Pred-Imm Pred-CCR
 // tTRAP: 0 operand (early return)
 static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   if (Opcode == ARM::tTRAP)
     return true;
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   assert(NumOps == 3 && OpInfo[0].RegClass == 0 &&
          OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
          && "Exactly 3 operands expected");
@@ -912,9 +934,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // tB: offset
 static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected");
 
   unsigned Imm11 = getT1Imm11(insn);
@@ -952,9 +976,8 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
 // 1101xx	Conditional branch, and Supervisor Call on page A6-13
 // 11100x	Unconditional Branch, see B on page A8-44
 //
-static bool DisassembleThumb1(uint16_t op,
-    MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   unsigned op1 = slice(op, 5, 4);
   unsigned op2 = slice(op, 3, 2);
@@ -963,27 +986,27 @@ static bool DisassembleThumb1(uint16_t op,
   switch (op1) {
   case 0:
     // A6.2.1 Shift (immediate), add, subtract, move, and compare
-    return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                    Builder);
+    return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
   case 1:
     switch (op2) {
     case 0:
       switch (op3) {
       case 0:
         // A6.2.2 Data-processing
-        return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                   Builder);
+        return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       case 1:
         // A6.2.3 Special data instructions and branch and exchange
-        return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
       default:
         // A8.6.59 LDR (literal)
-        return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       }
       break;
     default:
       // A6.2.4 Load/store single data item
-      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
       break;
     }
     break;
@@ -991,21 +1014,24 @@ static bool DisassembleThumb1(uint16_t op,
     switch (op2) {
     case 0:
       // A6.2.4 Load/store single data item
-      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
     case 1:
       // A6.2.4 Load/store single data item
-      return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     case 2:
       if (op3 <= 1) {
         // A8.6.10 ADR
-        return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
       } else {
         // A8.6.8 ADD (SP plus immediate)
-        return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
       }
     default:
       // A6.2.5 Miscellaneous 16-bit instructions
-      return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     }
     break;
   case 3:
@@ -1013,17 +1039,17 @@ static bool DisassembleThumb1(uint16_t op,
     case 0:
       if (op3 <= 1) {
         // A8.6.189 STM / STMIA / STMEA
-        return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       } else {
         // A8.6.53 LDM / LDMIA / LDMFD
-        return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       }
     case 1:
       // A6.2.6 Conditional branch, and Supervisor Call
-      return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     case 2:
       // Unconditional Branch, see B on page A8-44
-      return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     default:
       assert(0 && "Unreachable code");
       break;
@@ -1079,32 +1105,32 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
 
 // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
 static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   NumOpsAdded = 1;
   return true;
 }
 
 static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (Thumb2SRSOpcode(Opcode))
     return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
 
   if (Thumb2RFEOpcode(Opcode))
-    return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD ||
           Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD)
-         && "Invalid opcode");
+         && "Unexpected opcode");
   assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
 
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base.
   if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) {
@@ -1120,15 +1146,19 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
   ++OpIdx;
 
   // Handling the two predicate operands before the reglist.
-  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
-  OpIdx += 2;
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
 
-  // Fill the variadic part of reglist.
   unsigned RegListBits = insn & ((1 << 16) - 1);
+
+  // Fill the variadic part of reglist.
   for (unsigned i = 0; i < 16; ++i) {
     if ((RegListBits >> i) & 1) {
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
       ++OpIdx;
     }
@@ -1144,9 +1174,11 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 // t2STREXD: Rm Rd Rs Rn
 // t2STREXB, t2STREXH: Rm Rd Rn
 static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1163,25 +1195,25 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Add the destination operand for store.
   if (isStore) {
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(ARM::GPRRegClassID,
+                    getRegisterEnum(B, ARM::GPRRegClassID,
                                     isSW ? decodeRs(insn) : decodeRm(insn))));
     ++OpIdx;
   }
 
   // Source operand for store and destination operand for load.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
 
   // Thumb2 doubleword complication: with an extra source/destination operand.
   if (isDW) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRs(insn))));
     ++OpIdx;
   }
 
   // Finally add the pointer operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
@@ -1198,9 +1230,10 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
 // disassembly only and do not have a tied_to writeback base register operand.
 static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 4
          && OpInfo[0].RegClass == ARM::GPRRegClassID
@@ -1210,11 +1243,11 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
          && "Expect >= 4 operands and first 3 as reg operands");
 
   // Add the <Rt> <Rt2> operands.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
   // Finally add (+/-)imm8*4, depending on the U bit.
@@ -1235,15 +1268,15 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
 //
 // t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR
 static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 2 && "Expect >= 2 operands");
 
   // The generic version of TBB/TBH needs a base register.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   // Add the index register.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   NumOpsAdded = 2;
 
@@ -1278,7 +1311,7 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
 // nothing else, because the shift amount is already specified.
 // Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
 static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1293,7 +1326,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
            && OpInfo[3].RegClass == 0
            && "Exactlt 4 operands expect and first two as reg operands");
     // Only need to populate the src reg operand.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     MI.addOperand(MCOperand::CreateReg(0));
     MI.addOperand(MCOperand::CreateImm(0));
@@ -1315,7 +1348,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Build the register operands, followed by the constant shift specifier.
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(ARM::GPRRegClassID,
+                  getRegisterEnum(B, ARM::GPRRegClassID,
                                   NoDstReg ? decodeRn(insn) : decodeRs(insn))));
   ++OpIdx;
 
@@ -1324,15 +1357,18 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
     if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
       // Process tied_to operand constraint.
       MI.addOperand(MI.getOperand(Idx));
-    } else {
-      assert(!NoDstReg && "Internal error");
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      ++OpIdx;
+    } else if (!NoDstReg) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRn(insn))));
+      ++OpIdx;
+    } else {
+      DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
+      return false;
     }
-    ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1373,7 +1409,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // ModImm = ThumbExpandImm(i:imm3:imm8)
 static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1389,13 +1425,16 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
   // Build the register operands, followed by the modified immediate.
 
   MI.addOperand(MCOperand::CreateReg(
-                  getRegisterEnum(ARM::GPRRegClassID,
+                  getRegisterEnum(B, ARM::GPRRegClassID,
                                   NoDstReg ? decodeRn(insn) : decodeRs(insn))));
   ++OpIdx;
 
   if (TwoReg) {
-    assert(!NoDstReg && "Internal error");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    if (NoDstReg) {
+      DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
+      return false;
+    }
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
@@ -1437,7 +1476,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
   case ARM::t2USAT16:
     return slice(insn, 3, 0);
   default:
-    assert(0 && "Invalid opcode passed in");
+    assert(0 && "Unexpected opcode");
     return 0;
   }
 }
@@ -1459,7 +1498,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
 // o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
 // o t2SSAT16, t2USAT16: Rs sat_pos Rn
 static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1474,7 +1513,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
 
   // Build the register operand(s), followed by the immediate(s).
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
   ++OpIdx;
 
@@ -1482,7 +1521,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
   if (Thumb2SaturateOpcode(Opcode)) {
     MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
 
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
 
     if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
@@ -1510,7 +1549,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
       MI.addOperand(MI.getOperand(Idx));
     } else {
       // Add src reg operand.
-      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          decodeRn(insn))));
     }
     ++OpIdx;
@@ -1528,15 +1567,22 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
     MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
   else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
     MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
-  else if (Opcode == ARM::t2BFC)
-    MI.addOperand(MCOperand::CreateImm(getBitfieldInvMask(insn)));
-  else {
+  else if (Opcode == ARM::t2BFC) {
+    uint32_t mask = 0;
+    if (getBitfieldInvMask(insn, mask))
+      MI.addOperand(MCOperand::CreateImm(mask));
+    else
+      return false;
+  } else {
     // Handle the case of: lsb width
     assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
-            Opcode == ARM::t2BFI) && "Invalid opcode");
+            Opcode == ARM::t2BFI) && "Unexpected opcode");
     MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
     if (Opcode == ARM::t2BFI) {
-      assert(getMsb(insn) >= getLsb(insn) && "Encoding error");
+      if (getMsb(insn) < getLsb(insn)) {
+        DEBUG(errs() << "Encoding error: msb < lsb\n");
+        return false;
+      }
       MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
     } else
       MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
@@ -1585,7 +1631,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
 // t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
 // t2SMC -> imm4 = Inst{19-16}
 static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   if (NumOps == 0)
     return true;
@@ -1627,21 +1673,21 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
 
   // MRS and MRSsys take one GPR reg Rs.
   if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRs(insn))));
     NumOpsAdded = 1;
     return true;
   }
   // BXJ takes one GPR reg Rn.
   if (Opcode == ARM::t2BXJ) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     NumOpsAdded = 1;
     return true;
   }
   // MSR and MSRsys take one GPR reg Rn, followed by the mask.
   if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
     NumOpsAdded = 2;
@@ -1659,7 +1705,7 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
 
   switch (Opcode) {
   default:
-    assert(0 && "Unreachable code");
+    assert(0 && "Unexpected opcode");
     return false;
   case ARM::t2B:
     Offset = decodeImm32_B_EncodingT4(insn);
@@ -1700,7 +1746,7 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
 }
 
 static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   // Preload Data/Instruction requires either 2 or 3 operands.
   // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
@@ -1718,12 +1764,12 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
          "Expect >= 2 operands and first one as reg operand");
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
 
   if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
   } else {
     assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
@@ -1765,9 +1811,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
 // These instrs calculate an address from the PC value and an immediate offset.
 // Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
 static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
 
   assert(NumOps >= 2 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
@@ -1776,7 +1823,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 
   // Build the register operand, followed by the (+/-)imm12 immediate.
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
 
   MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
@@ -1812,16 +1859,16 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 // Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
 // Delegates to DisassembleThumb2Ldpci() for load * literal operations.
 static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   unsigned Rn = decodeRn(insn);
 
   if (Thumb2PreloadOpcode(Opcode))
-    return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   // See, for example, A6.3.7 Load word: Table A6-18 Load word.
   if (Load && Rn == 15)
-    return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded);
+    return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1870,13 +1917,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
       Imm = decodeImm8(insn);
   }
   
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R0)));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     R0)));
   ++OpIdx;
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R1)));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     R1)));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,R2)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       R2)));
     ++OpIdx;
   }
 
@@ -1900,7 +1950,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
 //
 // Miscellaneous operations: Rs [Rn] Rm
 static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -1917,17 +1967,17 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   ++OpIdx;
 
@@ -1954,7 +2004,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 // Unsigned Sum of Absolute Differences [and Accumulate]
 //    Rs Rn Rm [Ra=Inst{15-12}]
 static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -1968,17 +2018,17 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
 
   if (FourReg)
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
 
   NumOpsAdded = FourReg ? 4 : 3;
@@ -1999,7 +2049,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
 static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
@@ -2014,16 +2064,16 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Build the register operands.
 
   if (FourReg)
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRs(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
 
   if (FourReg)
@@ -2059,38 +2109,41 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 // 		1xxxxxx	-	Coprocessor instructions on page A6-40
 //
 static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
-    MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded) {
+    MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
+    unsigned &NumOpsAdded, BO B) {
 
   switch (op1) {
   case 1:
     if (slice(op2, 6, 5) == 0) {
       if (slice(op2, 2, 2) == 0) {
         // Load/store multiple.
-        return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
       }
 
       // Load/store dual, load/store exclusive, table branch, otherwise.
-      assert(slice(op2, 2, 2) == 1 && "Encoding error");
+      assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
       if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
           (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
         // Load/store exclusive.
-        return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
       }
       if (Opcode == ARM::t2LDRDi8 ||
           Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
           Opcode == ARM::t2STRDi8 ||
           Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
         // Load/store dual.
-        return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
       }
       if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) {
         // Table branch.
-        return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       }
     } else if (slice(op2, 6, 5) == 1) {
       // Data-processing (shifted register).
-      return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
     }
 
     // FIXME: A6.3.18 Coprocessor instructions
@@ -2101,14 +2154,17 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
     if (op == 0) {
       if (slice(op2, 5, 5) == 0) {
         // Data-processing (modified immediate)
-        return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
       } else {
         // Data-processing (plain binary immediate)
-        return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
       }
     } else {
       // Branches and miscellaneous control on page A6-20.
-      return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded);
+      return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
     }
 
     break;
@@ -2119,7 +2175,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
       if (slice(op2, 0, 0) == 0) {
         if (slice(op2, 4, 4) == 0) {
           // Store single data item on page A6-30
-          return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded);
+          return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
+                                       B);
         } else {
           // FIXME: Advanced SIMD element or structure load/store instructions.
           // But see ThumbDisassembler::getInstruction().
@@ -2127,19 +2184,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
         }
       } else {
         // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
-        return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded);
+        return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B);
       }
       break;
     case 1:
       if (slice(op2, 4, 4) == 0) {
         // A6.3.12 Data-processing (register)
-        return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       } else if (slice(op2, 3, 3) == 0) {
         // A6.3.16 Multiply, multiply accumulate, and absolute difference
-        return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       } else {
         // A6.3.17 Long multiply, long multiply accumulate, and divide
-        return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+        return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
       }
       break;
     default:
@@ -2151,7 +2209,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
 
     break;
   default:
-    assert(0 && "Encoding error for Thumb2 instruction!");
+    assert(0 && "Thumb2 encoding error!");
     break;
   }
 
@@ -2174,8 +2232,10 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned bits15_11 = slice(HalfWord, 15, 11);
 
   // A6.1 Thumb instruction set encoding
-  assert((bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) &&
-         "Bits [15:11] of first halfword of a Thumb2 instruction out of range");
+  if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
+    assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
+    return false;
+  }
 
   // A6.3 32-bit Thumb instruction encoding
   
@@ -2183,5 +2243,6 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   uint16_t op2 = slice(HalfWord, 10, 4);
   uint16_t op = slice(insn, 15, 15);
 
-  return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded);
+  return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
+                           Builder);
 }
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index a8dd38c..9e3ff29 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,8 +16,9 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
                 ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
                 ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
                 ARMGenDAGISel.inc ARMGenSubtarget.inc \
-                ARMGenCodeEmitter.inc ARMGenCallingConv.inc
+                ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+                ARMGenDecoderTables.inc ARMGenEDInfo.inc
 
-DIRS = AsmPrinter AsmParser TargetInfo
+DIRS = AsmPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a5dfcb3..2f635fe 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -36,9 +36,12 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-using namespace llvm;
 
+namespace llvm {
 extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
 
 Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
                                        const ARMSubtarget &sti)
@@ -56,7 +59,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                            unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C = ConstantInt::get(
+  const Constant *C = ConstantInt::get(
           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
@@ -461,6 +464,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     Offset -= AFI->getFramePtrSpillOffset();
   }
 
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return 0;
+  }
+
   unsigned Opcode = MI.getOpcode();
   const TargetInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index de46056..b143bd9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -44,18 +44,22 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (DestRC == ARM::GPRRegisterClass &&
-      SrcRC == ARM::GPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
-    return true;
-  } else if (DestRC == ARM::GPRRegisterClass &&
-             SrcRC == ARM::tGPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
-    return true;
-  } else if (DestRC == ARM::tGPRRegisterClass &&
-             SrcRC == ARM::GPRRegisterClass) {
-    BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
-    return true;
+  if (DestRC == ARM::GPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
+      return true;
+    }
+  } else if (DestRC == ARM::tGPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+      return true;
+    }
   }
 
   // Handle SPR, DPR, and QPR copies.
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index f24d3e2..07dd0be 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -52,7 +52,7 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                            unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
-  Constant *C = ConstantInt::get(
+  const Constant *C = ConstantInt::get(
            Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 2bc75f2..8fe2e42 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -656,15 +656,8 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   bool Modified = false;
 
-  bool LiveCPSR = false;
   // Yes, CPSR could be livein.
-  for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
-         E = MBB.livein_end(); I != E; ++I) {
-    if (*I == ARM::CPSR) {
-      LiveCPSR = true;
-      break;
-    }
-  }
+  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
 
   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
   MachineBasicBlock::iterator NextMII;
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index eb5e429..a6c6f52 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -192,10 +192,13 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
       llvm_unreachable("unknown relocatable instruction");
     }
     if (MO.isGlobal())
-      MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
-                                                 Reloc, MO.getGlobal(), Offset,
-                                                 isa<Function>(MO.getGlobal()),
-                                                 useGOT));
+      MCE.addRelocation(MachineRelocation::getGV(
+            MCE.getCurrentPCOffset(),
+            Reloc,
+            const_cast<GlobalValue *>(MO.getGlobal()),
+            Offset,
+            isa<Function>(MO.getGlobal()),
+            useGOT));
     else if (MO.isSymbol())
       MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
                                                      Reloc, MO.getSymbolName(),
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index 5303d85..d526dc0 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -14,7 +14,6 @@
 
 #include "Alpha.h"
 #include "AlphaTargetMachine.h"
-#include "AlphaISelLowering.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -309,7 +308,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
                                   T, CurDAG->getRegister(Alpha::F31, T),
                                   CurDAG->getRegister(Alpha::F31, T));
     } else {
-      llvm_report_error("Unhandled FP constant type");
+      report_fatal_error("Unhandled FP constant type");
     }
     break;
   }
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 94c6f80..1d85f12 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -13,6 +13,7 @@
 
 #include "AlphaISelLowering.h"
 #include "AlphaTargetMachine.h"
+#include "AlphaMachineFunctionInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -225,7 +226,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
   // Alpha target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -342,7 +343,7 @@ AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                      CallingConv::ID CallConv, bool isVarArg,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                      DebugLoc dl, SelectionDAG &DAG,
-                                     SmallVectorImpl<SDValue> &InVals) {
+                                     SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -385,10 +386,12 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
                                           const SmallVectorImpl<ISD::InputArg>
                                             &Ins,
                                           DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
 
   unsigned args_int[] = {
     Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
@@ -435,14 +438,14 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // If the functions takes variable number of arguments, copy all regs to stack
   if (isVarArg) {
-    VarArgsOffset = Ins.size() * 8;
+    FuncInfo->setVarArgsOffset(Ins.size() * 8);
     std::vector<SDValue> LS;
     for (int i = 0; i < 6; ++i) {
       if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
         args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
       SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
       int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
-      if (i == 0) VarArgsBase = FI;
+      if (i == 0) FuncInfo->setVarArgsBase(FI);
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
       LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
                                 false, false, 0));
@@ -467,7 +470,7 @@ SDValue
 AlphaTargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
                                   DAG.getNode(AlphaISD::GlobalRetAddr,
@@ -525,7 +528,8 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
 }
 
 void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
-                                     SDValue &DataPtr, SelectionDAG &DAG) {
+                                     SDValue &DataPtr,
+                                     SelectionDAG &DAG) const {
   Chain = N->getOperand(0);
   SDValue VAListP = N->getOperand(1);
   const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
@@ -556,7 +560,8 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
-SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
+                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
@@ -624,7 +629,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   }
   case ISD::ConstantPool: {
     ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-    Constant *C = CP->getConstVal();
+    const Constant *C = CP->getConstVal();
     SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
     // FIXME there isn't really any debug info here
 
@@ -637,7 +642,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     llvm_unreachable("TLS not implemented for Alpha.");
   case ISD::GlobalAddress: {
     GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-    GlobalValue *GV = GSDN->getGlobal();
+    const GlobalValue *GV = GSDN->getGlobal();
     SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());
     // FIXME there isn't really any debug info here
 
@@ -725,17 +730,22 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
                              false, false, 0);
   }
   case ISD::VASTART: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
     SDValue Chain = Op.getOperand(0);
     SDValue VAListP = Op.getOperand(1);
     const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
     // vastart stores the address of the VarArgsBase and VarArgsOffset
-    SDValue FR  = DAG.getFrameIndex(VarArgsBase, MVT::i64);
+    SDValue FR  = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
     SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
                                false, false, 0);
     SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                                 DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
+    return DAG.getTruncStore(S1, dl,
+                             DAG.getConstant(FuncInfo->getVarArgsOffset(),
+                                             MVT::i64),
                              SA2, NULL, 0, MVT::i32, false, false, 0);
   }
   case ISD::RETURNADDR:
@@ -749,7 +759,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   assert(N->getValueType(0) == MVT::i32 &&
          N->getOpcode() == ISD::VAARG &&
@@ -822,8 +832,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
 
 MachineBasicBlock *
 AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   assert((MI->getOpcode() == Alpha::CAS32 ||
           MI->getOpcode() == Alpha::CAS64 ||
@@ -854,11 +863,6 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
 
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
-         E = BB->succ_end(); I != E; ++I)
-    EM->insert(std::make_pair(*I, sinkMBB));
-
   sinkMBB->transferSuccessors(thisMBB);
 
   F->insert(It, llscMBB);
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index 0f17025..7ee823a 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -60,8 +60,6 @@ namespace llvm {
   }
 
   class AlphaTargetLowering : public TargetLowering {
-    int VarArgsOffset;  // What is the offset to the first vaarg
-    int VarArgsBase;    // What is the base FrameIndex
   public:
     explicit AlphaTargetLowering(TargetMachine &TM);
     
@@ -70,13 +68,13 @@ namespace llvm {
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     // Friendly names for dumps
     const char *getTargetNodeName(unsigned Opcode) const;
@@ -85,7 +83,7 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
@@ -93,9 +91,9 @@ namespace llvm {
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                         EVT VT) const;
 
-    MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *BB) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
@@ -110,14 +108,14 @@ namespace llvm {
   private:
     // Helpers for custom lowering.
     void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
-                    SelectionDAG &DAG);
+                    SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -125,13 +123,13 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
   };
 }
 
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
index cb8eb51..12685ed 100644
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -103,7 +103,7 @@ extern "C" {
 
   asm(
       ".text\n"
-      ".globl AlphaComilationCallbackC\n"
+      ".globl AlphaCompilationCallbackC\n"
       ".align 4\n"
       ".globl AlphaCompilationCallback\n"
       ".ent AlphaCompilationCallback\n"
diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
index 8221fc7..186738c 100644
--- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h
+++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -30,17 +30,31 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo {
   /// the return address value.
   unsigned GlobalRetAddr;
 
+  /// VarArgsOffset - What is the offset to the first vaarg
+  int VarArgsOffset;
+  /// VarArgsBase - What is the base FrameIndex
+  int VarArgsBase;
+
 public:
-  AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {}
+  AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
+                               VarArgsOffset(0), VarArgsBase(0) {}
 
   explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
-                                                           GlobalRetAddr(0) {}
+                                                           GlobalRetAddr(0),
+                                                           VarArgsOffset(0),
+                                                           VarArgsBase(0) {}
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
 
   unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
   void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
+
+  int getVarArgsOffset() const { return VarArgsOffset; }
+  void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
+
+  int getVarArgsBase() const { return VarArgsBase; }
+  void setVarArgsBase(int Base) { VarArgsBase = Base; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 16a23cc..c083d8c 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -212,15 +212,14 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   //handle GOP offset
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
-    .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+    .addGlobalAddress(MF.getFunction())
     .addReg(Alpha::R27).addImm(++curgpdist);
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
-    .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+    .addGlobalAddress(MF.getFunction())
     .addReg(Alpha::R29).addImm(curgpdist);
 
-  //evil const_cast until MO stuff setup to handle const
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
-    .addGlobalAddress(const_cast<Function*>(MF.getFunction()));
+    .addGlobalAddress(MF.getFunction());
 
   // Get the number of bytes to allocate from the FrameInfo
   long NumBytes = MFI->getStackSize();
@@ -248,10 +247,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
     BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
       .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
   } else {
-    std::string msg;
-    raw_string_ostream Msg(msg); 
-    Msg << "Too big a stack frame at " << NumBytes;
-    llvm_report_error(Msg.str());
+    report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
   }
 
   //now if we need to, save the old FP and set the new
@@ -300,10 +296,7 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
         .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
     } else {
-      std::string msg;
-      raw_string_ostream Msg(msg); 
-      Msg << "Too big a stack frame at " << NumBytes;
-      llvm_report_error(Msg.str());
+      report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
     }
   }
 }
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
index b7b4560..4dc04b8 100644
--- a/lib/Target/Alpha/AlphaSchedule.td
+++ b/lib/Target/Alpha/AlphaSchedule.td
@@ -53,7 +53,8 @@ def s_pseudo : InstrItinClass;
 //Table 2�4 Instruction Class Latency in Cycles
 //modified some
 
-def Alpha21264Itineraries : ProcessorItineraries<[
+def Alpha21264Itineraries : ProcessorItineraries<
+  [L0, L1, FST0, FST1, U0, U1, FA, FM], [
   InstrItinData<s_ild    , [InstrStage<3, [L0, L1]>]>,
   InstrItinData<s_fld    , [InstrStage<4, [L0, L1]>]>,
   InstrItinData<s_ist    , [InstrStage<0, [L0, L1]>]>,
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
new file mode 100644
index 0000000..0eb7b8f
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-selectiondag-info"
+#include "AlphaSelectionDAGInfo.h"
+using namespace llvm;
+
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() {
+}
+
+AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
+}
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
new file mode 100644
index 0000000..70889ae
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASELECTIONDAGINFO_H
+#define ALPHASELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  AlphaSelectionDAGInfo();
+  ~AlphaSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 6f3a774..0990f6d 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -44,8 +44,8 @@ public:
   virtual const AlphaRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual AlphaTargetLowering* getTargetLowering() const {
-    return const_cast<AlphaTargetLowering*>(&TLInfo);
+  virtual const AlphaTargetLowering* getTargetLowering() const {
+    return &TLInfo;
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
   virtual AlphaJITInfo* getJITInfo() {
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 2a1f5559..9f4aff6 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -73,7 +73,7 @@ namespace {
 void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Register) {
+  if (MO.isReg()) {
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Not physreg??");
     O << getRegisterName(MO.getReg());
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index b4f41ae..fbf7f3a 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_target(AlphaCodeGen
   AlphaRegisterInfo.cpp
   AlphaSubtarget.cpp
   AlphaTargetMachine.cpp
+  AlphaSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index c8d71aa..b4da96c 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Blackfin.h"
-#include "BlackfinISelLowering.h"
 #include "BlackfinTargetMachine.h"
 #include "BlackfinRegisterInfo.h"
 #include "llvm/Intrinsics.h"
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 5ce2013..adf2118 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -139,15 +139,16 @@ MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
 }
 
 SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   Op = DAG.getTargetGlobalAddress(GV, MVT::i32);
   return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
 }
 
-SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
+                                               SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
   int JTI = cast<JumpTableSDNode>(Op)->getIndex();
 
@@ -161,7 +162,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
                                             const SmallVectorImpl<ISD::InputArg>
                                                &Ins,
                                              DebugLoc dl, SelectionDAG &DAG,
-                                             SmallVectorImpl<SDValue> &InVals) {
+                                             SmallVectorImpl<SDValue> &InVals)
+                                               const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -218,7 +220,7 @@ SDValue
 BlackfinTargetLowering::LowerReturn(SDValue Chain,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    DebugLoc dl, SelectionDAG &DAG) {
+                                    DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -278,7 +280,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals) {
+                                  SmallVectorImpl<SDValue> &InVals) const {
   // Blackfin target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -414,7 +416,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
 // Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
 // add-with-carry instructions.
-SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) {
+SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
   // Operands: lhs, rhs, carry-in (AC0 flag)
   // Results: sum, carry-out (AC0 flag)
   DebugLoc dl = Op.getDebugLoc();
@@ -448,7 +450,8 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(ops, 2, dl);
 }
 
-SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
+                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default:
     Op.getNode()->dump();
@@ -468,7 +471,7 @@ SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 void
 BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue> &Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index 5f39910..a784248 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -30,16 +30,13 @@ namespace llvm {
   }
 
   class BlackfinTargetLowering : public TargetLowering {
-    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
   public:
     BlackfinTargetLowering(TargetMachine &TM);
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
     virtual void ReplaceNodeResults(SDNode *N,
                                     SmallVectorImpl<SDValue> &Results,
-                                    SelectionDAG &DAG);
-
-    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+                                    SelectionDAG &DAG) const;
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
@@ -52,29 +49,29 @@ namespace llvm {
     unsigned getFunctionAlignment(const Function *F) const;
 
   private:
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 6fd610f..2512c9b 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -110,7 +110,8 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
 // if frame pointer elimination is disabled.
 bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasCalls() || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) ||
+    MFI->hasCalls() || MFI->hasVarSizedObjects();
 }
 
 bool BlackfinRegisterInfo::
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
new file mode 100644
index 0000000..f4bb25f
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BlackfinSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "blackfin-selectiondag-info"
+#include "BlackfinSelectionDAGInfo.h"
+using namespace llvm;
+
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() {
+}
+
+BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
+}
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
new file mode 100644
index 0000000..a620330
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINSELECTIONDAGINFO_H
+#define BLACKFINSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  BlackfinSelectionDAGInfo();
+  ~BlackfinSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index a14052b..07e7394 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -43,8 +43,8 @@ namespace llvm {
     virtual const BlackfinRegisterInfo *getRegisterInfo() const {
       return &InstrInfo.getRegisterInfo();
     }
-    virtual BlackfinTargetLowering* getTargetLowering() const {
-      return const_cast<BlackfinTargetLowering*>(&TLInfo);
+    virtual const BlackfinTargetLowering* getTargetLowering() const {
+      return &TLInfo;
     }
     virtual const TargetData *getTargetData() const { return &DataLayout; }
     virtual bool addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
index deb005d..f8847d0 100644
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ b/lib/Target/Blackfin/CMakeLists.txt
@@ -20,4 +20,5 @@ add_llvm_target(BlackfinCodeGen
   BlackfinRegisterInfo.cpp
   BlackfinSubtarget.cpp
   BlackfinTargetMachine.cpp
+  BlackfinSelectionDAGInfo.cpp
   )
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 0c265ad..67f513b 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -274,8 +274,8 @@ namespace {
     
     // isInlineAsm - Check if the instruction is a call to an inline asm chunk
     static bool isInlineAsm(const Instruction& I) {
-      if (isa<CallInst>(&I) && isa<InlineAsm>(I.getOperand(0)))
-        return true;
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        return isa<InlineAsm>(CI->getCalledValue());
       return false;
     }
     
@@ -473,8 +473,9 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
     PrintedType = true;
   }
   if (FTy->isVarArg()) {
-    if (PrintedType)
-      FunctionInnards << ", ...";
+    if (!PrintedType)
+      FunctionInnards << " int"; //dummy argument for empty vararg functs
+    FunctionInnards << ", ...";
   } else if (!PrintedType) {
     FunctionInnards << "void";
   }
@@ -568,8 +569,9 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
       ++Idx;
     }
     if (FTy->isVarArg()) {
-      if (FTy->getNumParams())
-        FunctionInnards << ", ...";
+      if (!FTy->getNumParams())
+        FunctionInnards << " int"; //dummy argument for empty vaarg functs
+      FunctionInnards << ", ...";
     } else if (!FTy->getNumParams()) {
       FunctionInnards << "void";
     }
@@ -1344,7 +1346,7 @@ void CWriter::writeInstComputationInline(Instruction &I) {
         Ty!=Type::getInt16Ty(I.getContext()) &&
         Ty!=Type::getInt32Ty(I.getContext()) &&
         Ty!=Type::getInt64Ty(I.getContext()))) {
-      llvm_report_error("The C backend does not currently support integer "
+      report_fatal_error("The C backend does not currently support integer "
                         "types of widths other than 1, 8, 16, 32, 64.\n"
                         "This is being tracked as PR 4158.");
   }
@@ -2237,12 +2239,16 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
     }
   }
 
+  if (!PrintedArg && FT->isVarArg()) {
+    FunctionInnards << "int vararg_dummy_arg";
+    PrintedArg = true;
+  }
+
   // Finish printing arguments... if this is a vararg function, print the ...,
   // unless there are no known types, in which case, we just emit ().
   //
   if (FT->isVarArg() && PrintedArg) {
-    if (PrintedArg) FunctionInnards << ", ";
-    FunctionInnards << "...";  // Output varargs portion of signature!
+    FunctionInnards << ",...";  // Output varargs portion of signature!
   } else if (!FT->isVarArg() && !PrintedArg) {
     FunctionInnards << "void"; // ret() -> ret(void) in C.
   }
@@ -2858,7 +2864,7 @@ void CWriter::lowerIntrinsics(Function &F) {
 }
 
 void CWriter::visitCallInst(CallInst &I) {
-  if (isa<InlineAsm>(I.getOperand(0)))
+  if (isa<InlineAsm>(I.getCalledValue()))
     return visitInlineAsm(I);
 
   bool WroteCallee = false;
@@ -2928,6 +2934,12 @@ void CWriter::visitCallInst(CallInst &I) {
 
   Out << '(';
 
+  bool PrintedArg = false;
+  if(FTy->isVarArg() && !FTy->getNumParams()) {
+    Out << "0 /*dummy arg*/";
+    PrintedArg = true;
+  }
+
   unsigned NumDeclaredParams = FTy->getNumParams();
 
   CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end();
@@ -2937,7 +2949,7 @@ void CWriter::visitCallInst(CallInst &I) {
     ++ArgNo;
   }
       
-  bool PrintedArg = false;
+
   for (; AI != AE; ++AI, ++ArgNo) {
     if (PrintedArg) Out << ", ";
     if (ArgNo < NumDeclaredParams &&
@@ -2987,15 +2999,10 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     writeOperand(I.getOperand(1));
     Out << ", ";
     // Output the last argument to the enclosing function.
-    if (I.getParent()->getParent()->arg_empty()) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "The C backend does not currently support zero "
-           << "argument varargs functions, such as '"
-           << I.getParent()->getParent()->getName() << "'!";
-      llvm_report_error(Msg.str());
-    }
-    writeOperand(--I.getParent()->getParent()->arg_end());
+    if (I.getParent()->getParent()->arg_empty())
+      Out << "vararg_dummy_arg";
+    else
+      writeOperand(--I.getParent()->getParent()->arg_end());
     Out << ')';
     return true;
   case Intrinsic::vaend:
@@ -3165,7 +3172,7 @@ static std::string gccifyAsm(std::string asmstr) {
 //TODO: assumptions about what consume arguments from the call are likely wrong
 //      handle communitivity
 void CWriter::visitInlineAsm(CallInst &CI) {
-  InlineAsm* as = cast<InlineAsm>(CI.getOperand(0));
+  InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
   std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
   
   std::vector<std::pair<Value*, int> > ResultVals;
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
index 1e508fe..8a2b59a 100644
--- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMCellSPUAsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMCellSPUAsmPrinter
   SPUAsmPrinter.cpp
-  )
-add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
-\ No newline at end of file
+  )
+add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 0ef36e5..3e95531 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -280,7 +280,7 @@ namespace {
 void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
   switch (MO.getType()) {
   case MachineOperand::MO_Immediate:
-    llvm_report_error("printOp() does not handle immediate values");
+    report_fatal_error("printOp() does not handle immediate values");
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
@@ -307,7 +307,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
     // External or weakly linked global variables need non-lazily-resolved
     // stubs
     if (TM.getRelocationModel() != Reloc::Static) {
-      GlobalValue *GV = MO.getGlobal();
+      const GlobalValue *GV = MO.getGlobal();
       if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
             GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
         O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index 0cb6676..ddfca37 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_target(CellSPUCodeGen
   SPURegisterInfo.cpp
   SPUSubtarget.cpp
   SPUTargetMachine.cpp
+  SPUSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 90f8310..c3c2b39 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -14,7 +14,6 @@
 
 #include "SPU.h"
 #include "SPUTargetMachine.h"
-#include "SPUISelLowering.h"
 #include "SPUHazardRecognizers.h"
 #include "SPUFrameInfo.h"
 #include "SPURegisterNames.h"
@@ -194,11 +193,8 @@ namespace {
 
 #ifndef NDEBUG
     if (retval == 0) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for "
-           << VT.getEVTString();
-      llvm_report_error(Msg.str());
+      report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns"
+                         "NULL for " + Twine(VT.getEVTString()));
     }
 #endif
 
@@ -242,8 +238,8 @@ namespace {
   class SPUDAGToDAGISel :
     public SelectionDAGISel
   {
-    SPUTargetMachine &TM;
-    SPUTargetLowering &SPUtli;
+    const SPUTargetMachine &TM;
+    const SPUTargetLowering &SPUtli;
     unsigned GlobalBaseReg;
 
   public:
@@ -305,16 +301,15 @@ namespace {
       std::vector<Constant*> CV;
 
       for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
-        ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i));
+        ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
         CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
       }
 
-      Constant *CP = ConstantVector::get(CV);
+      const Constant *CP = ConstantVector::get(CV);
       SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
       unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
       SDValue CGPoolOffset =
-              SPU::LowerConstantPool(CPIdx, *CurDAG,
-                                     SPUtli.getSPUTargetMachine());
+              SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
       
       HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
                                          CurDAG->getEntryNode(), CGPoolOffset,
@@ -433,13 +428,13 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
   case ISD::Constant:
   case ISD::ConstantPool:
   case ISD::GlobalAddress:
-    llvm_report_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered.");
+    report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered.");
     /*NOTREACHED*/
 
   case ISD::TargetConstant:
   case ISD::TargetGlobalAddress:
   case ISD::TargetJumpTable:
-    llvm_report_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+    report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
                       "not wrapped as A-form address.");
     /*NOTREACHED*/
 
@@ -457,7 +452,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
 
       case ISD::TargetGlobalAddress: {
         GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
-        GlobalValue *GV = GSDN->getGlobal();
+        const GlobalValue *GV = GSDN->getGlobal();
         if (GV->getAlignment() == 16) {
           Base = Op0;
           Index = Zero;
@@ -510,7 +505,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
 
   if (Opc == ISD::FrameIndex) {
     // Stack frame index must be less than 512 (divided by 16):
-    FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N);
+    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
     int FI = int(FIN->getIndex());
     DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
                << FI << "\n");
@@ -531,11 +526,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
       return true;
     } else if (Op1.getOpcode() == ISD::Constant
                || Op1.getOpcode() == ISD::TargetConstant) {
-      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
       int32_t offset = int32_t(CN->getSExtValue());
 
       if (Op0.getOpcode() == ISD::FrameIndex) {
-        FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op0);
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
         int FI = int(FIN->getIndex());
         DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
@@ -552,11 +547,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
       }
     } else if (Op0.getOpcode() == ISD::Constant
                || Op0.getOpcode() == ISD::TargetConstant) {
-      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0);
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
       int32_t offset = int32_t(CN->getSExtValue());
 
       if (Op1.getOpcode() == ISD::FrameIndex) {
-        FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op1);
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
         int FI = int(FIN->getIndex());
         DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
@@ -725,7 +720,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
 
     switch (Op0VT.getSimpleVT().SimpleTy) {
     default:
-      llvm_report_error("CellSPU Select: Unhandled zero/any extend EVT");
+      report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
       /*NOTREACHED*/
     case MVT::i32:
       shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
@@ -915,11 +910,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 
     if (vtm->ldresult_ins == 0) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "LDRESULT for unsupported type: "
-           << VT.getEVTString();
-      llvm_report_error(Msg.str());
+      report_fatal_error("LDRESULT for unsupported type: " +
+                         Twine(VT.getEVTString()));
     }
 
     Opc = vtm->ldresult_ins;
@@ -1252,7 +1244,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
     return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
                                   SDValue(emitBuildVector(i64vec.getNode()), 0));
   } else {
-    llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+    report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
                       "condition");
   }
 }
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 4b0d442..5e04454 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SPUISelLowering.h"
 #include "SPUTargetMachine.h"
 #include "SPUFrameInfo.h"
+#include "SPUMachineFunction.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
@@ -71,11 +72,8 @@ namespace {
 
 #ifndef NDEBUG
     if (retval == 0) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "getValueTypeMapEntry returns NULL for "
-           << VT.getEVTString();
-      llvm_report_error(Msg.str());
+      report_fatal_error("getValueTypeMapEntry returns NULL for " +
+                         Twine(VT.getEVTString()));
     }
 #endif
 
@@ -91,7 +89,7 @@ namespace {
 
   SDValue
   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
-                bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
     // The input chain to this libcall is the entry node of the function.
     // Legalizing the call will automatically add the previous call to the
     // dependence.
@@ -714,12 +712,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   case ISD::POST_DEC:
   case ISD::LAST_INDEXED_MODE:
     {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
-            "UNINDEXED\n";
-      Msg << (unsigned) LN->getAddressingMode();
-      llvm_report_error(Msg.str());
+      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
+                         "than UNINDEXED\n" +
+                         Twine((unsigned)LN->getAddressingMode()));
       /*NOTREACHED*/
     }
   }
@@ -884,12 +879,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   case ISD::POST_DEC:
   case ISD::LAST_INDEXED_MODE:
     {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
-            "UNINDEXED\n";
-      Msg << (unsigned) SN->getAddressingMode();
-      llvm_report_error(Msg.str());
+      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
+                         "than UNINDEXED\n" +
+                         Twine((unsigned)SN->getAddressingMode()));
       /*NOTREACHED*/
     }
   }
@@ -902,7 +894,7 @@ static SDValue
 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  Constant *C = CP->getConstVal();
+  const Constant *C = CP->getConstVal();
   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
   SDValue Zero = DAG.getConstant(0, PtrVT);
   const TargetMachine &TM = DAG.getTarget();
@@ -960,7 +952,7 @@ static SDValue
 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  GlobalValue *GV = GSDN->getGlobal();
+  const GlobalValue *GV = GSDN->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
   const TargetMachine &TM = DAG.getTarget();
   SDValue Zero = DAG.getConstant(0, PtrVT);
@@ -976,7 +968,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
     }
   } else {
-    llvm_report_error("LowerGlobalAddress: Relocation model other than static"
+    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
                       "not supported.");
     /*NOTREACHED*/
   }
@@ -1013,11 +1005,13 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                           &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
 
   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
@@ -1038,13 +1032,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       const TargetRegisterClass *ArgRegClass;
 
       switch (ObjectVT.getSimpleVT().SimpleTy) {
-      default: {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "LowerFormalArguments Unhandled argument type: "
-             << ObjectVT.getEVTString();
-        llvm_report_error(Msg.str());
-      }
+      default:
+        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+                           Twine(ObjectVT.getEVTString()));
       case MVT::i8:
         ArgRegClass = &SPU::R8CRegClass;
         break;
@@ -1104,9 +1094,10 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
     // Create the frame slot
 
     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
-      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
-                                                 true, false);
-      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+      FuncInfo->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(StackSlotSize, ArgOffset,
+                               true, false));
+      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
@@ -1146,7 +1137,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   // CellSPU target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -1255,7 +1246,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    GlobalValue *GV = G->getGlobal();
+    const GlobalValue *GV = G->getGlobal();
     EVT CalleeVT = Callee.getValueType();
     SDValue Zero = DAG.getConstant(0, PtrVT);
     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
@@ -1339,22 +1330,12 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       InVals.push_back(Chain.getValue(0));
     }
     break;
+  case MVT::i8:
+  case MVT::i16:
   case MVT::i64:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
-                               InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
   case MVT::i128:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
-                               InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
   case MVT::f32:
   case MVT::f64:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
-                               InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
   case MVT::v2f64:
   case MVT::v2i64:
   case MVT::v4f32:
@@ -1374,7 +1355,7 @@ SDValue
 SPUTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -1581,14 +1562,10 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   uint64_t SplatBits = APSplatBits.getZExtValue();
 
   switch (VT.getSimpleVT().SimpleTy) {
-  default: {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
-         << VT.getEVTString();
-    llvm_report_error(Msg.str());
+  default:
+    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+                       Twine(VT.getEVTString()));
     /*NOTREACHED*/
-  }
   case MVT::v4f32: {
     uint32_t Value32 = uint32_t(SplatBits);
     assert(SplatBitSize == 32
@@ -2004,7 +1981,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     // slot 0 across the vector
     EVT VecVT = N.getValueType();
     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
-      llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
                         "vector type!");
     }
 
@@ -2032,7 +2009,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 
     switch (VT.getSimpleVT().SimpleTy) {
     default:
-      llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
                         "type");
       /*NOTREACHED*/
     case MVT::i8: {
@@ -2368,7 +2345,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
  All conversions to i64 are expanded to a libcall.
  */
 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                              SPUTargetLowering &TLI) {
+                              const SPUTargetLowering &TLI) {
   EVT OpVT = Op.getValueType();
   SDValue Op0 = Op.getOperand(0);
   EVT Op0VT = Op0.getValueType();
@@ -2394,7 +2371,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
  All conversions from i64 are expanded to a libcall.
  */
 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
-                              SPUTargetLowering &TLI) {
+                              const SPUTargetLowering &TLI) {
   EVT OpVT = Op.getValueType();
   SDValue Op0 = Op.getOperand(0);
   EVT Op0VT = Op0.getValueType();
@@ -2515,7 +2492,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
   case ISD::SETONE:
     compareOp = ISD::SETNE; break;
   default:
-    llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
+    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
   }
 
   SDValue result =
@@ -2670,7 +2647,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
   lowering of nodes.
  */
 SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   unsigned Opc = (unsigned) Op.getOpcode();
   EVT VT = Op.getValueType();
@@ -2766,7 +2743,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
 
 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG)
+                                           SelectionDAG &DAG) const
 {
 #if 0
   unsigned Opc = (unsigned) N->getOpcode();
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 3c51177..9ebd442 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -109,11 +109,11 @@ namespace llvm {
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
     //! Custom lowering hooks
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     //! Custom lowering hook for nodes with illegal result types.
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
@@ -153,7 +153,7 @@ namespace llvm {
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -162,13 +162,13 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
   };
 }
 
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 3e17a51..68445cf 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -18,7 +18,6 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = "\t.quad\t";
   AlignmentIsInBytes = false;
-  HasLCOMMDirective = true;
       
   PCSymbol = ".";
   CommentString = "#";
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
index 6a66967..3ef3ccb 100644
--- a/lib/Target/CellSPU/SPUMachineFunction.h
+++ b/lib/Target/CellSPU/SPUMachineFunction.h
@@ -26,14 +26,20 @@ private:
   ///
   bool UsesLR;
 
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   SPUFunctionInfo(MachineFunction& MF) 
-  : UsesLR(false)
+  : UsesLR(false),
+    VarArgsFrameIndex(0)
   {}
 
   void setUsesLR(bool U) { UsesLR = U; }
   bool usesLR()          { return UsesLR; }
 
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index ffac581..fdbe10f 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -179,7 +179,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   case SPU::R126: return 126;
   case SPU::R127: return 127;
   default:
-    llvm_report_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
+    report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
   }
 }
 
@@ -303,7 +303,7 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 //
 static bool needsFP(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 //--------------------------------------------------------------------------
@@ -509,10 +509,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
         .addReg(SPU::R2)
         .addReg(SPU::R1);
     } else {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Unhandled frame size: " << FrameSize;
-      llvm_report_error(Msg.str());
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
     }
 
     if (hasDebugInfo) {
@@ -605,10 +602,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
         .addReg(SPU::R2)
         .addReg(SPU::R1);
     } else {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Unhandled frame size: " << FrameSize;
-      llvm_report_error(Msg.str());
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
     }
    }
 }
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
index 785dc46..a0b581f 100644
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -36,7 +36,7 @@ def RotateShift  : InstrItinClass;              // EVEN_UNIT
 def ImmLoad      : InstrItinClass;              // EVEN_UNIT
 
 /* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
   InstrItinData<LoadStore   , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchHints , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchResolv, [InstrStage<4,  [ODD_UNIT]>]>,
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
new file mode 100644
index 0000000..ca2a4bf
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cellspu-selectiondag-info"
+#include "SPUSelectionDAGInfo.h"
+using namespace llvm;
+
+SPUSelectionDAGInfo::SPUSelectionDAGInfo() {
+}
+
+SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
+}
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
new file mode 100644
index 0000000..0a6b4c1
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSPUSELECTIONDAGINFO_H
+#define CELLSPUSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  SPUSelectionDAGInfo();
+  ~SPUSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 9fdcfe9..37e7cd2 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -57,8 +57,8 @@ public:
     return NULL;
   }
 
-  virtual       SPUTargetLowering *getTargetLowering() const { 
-   return const_cast<SPUTargetLowering*>(&TLInfo); 
+  virtual const SPUTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
   }
 
   virtual const SPURegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 9c5893c..e739b26 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -210,7 +210,7 @@ namespace {
   }
 
   void CppWriter::error(const std::string& msg) {
-    llvm_report_error(msg);
+    report_fatal_error(msg);
   }
 
   // printCFP - Print a floating point constant .. very carefully :)
@@ -1082,8 +1082,9 @@ namespace {
 
     // Before we emit this instruction, we need to take care of generating any
     // forward references. So, we get the names of all the operands in advance
-    std::string* opNames = new std::string[I->getNumOperands()];
-    for (unsigned i = 0; i < I->getNumOperands(); i++) {
+    const unsigned Ops(I->getNumOperands());
+    std::string* opNames = new std::string[Ops];
+    for (unsigned i = 0; i < Ops; i++) {
       opNames[i] = getOpName(I->getOperand(i));
     }
 
@@ -1144,15 +1145,15 @@ namespace {
       const InvokeInst* inv = cast<InvokeInst>(I);
       Out << "std::vector<Value*> " << iName << "_params;";
       nl(Out);
-      for (unsigned i = 3; i < inv->getNumOperands(); ++i) {
+      for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) {
         Out << iName << "_params.push_back("
             << opNames[i] << ");";
         nl(Out);
       }
       Out << "InvokeInst *" << iName << " = InvokeInst::Create("
-          << opNames[0] << ", "
-          << opNames[1] << ", "
-          << opNames[2] << ", "
+          << opNames[Ops - 3] << ", "
+          << opNames[Ops - 2] << ", "
+          << opNames[Ops - 1] << ", "
           << iName << "_params.begin(), " << iName << "_params.end(), \"";
       printEscapedString(inv->getName());
       Out << "\", " << bbname << ");";
diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
index cfb2fc8..fac2c19 100644
--- a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMMBlazeAsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
   MBlazeAsmPrinter.cpp
-  )
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
-\ No newline at end of file
+  )
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index b1df926..04dfb0a 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -268,7 +268,7 @@ void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
 void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
                                         raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Immediate)
+  if (MO.isImm())
     O << (unsigned int)MO.getImm();
   else
     printOperand(MI, opNum, O);
@@ -277,7 +277,7 @@ void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
 void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Immediate)
+  if (MO.isImm())
     O << "rfsl" << (unsigned int)MO.getImm();
   else
     printOperand(MI, opNum, O);
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index c93e3df..7f85bf8 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_target(MBlazeCodeGen
   MBlazeTargetMachine.cpp
   MBlazeTargetObjectFile.cpp
   MBlazeIntrinsicInfo.cpp
+  MBlazeSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 7e59c4a..c7cd5f4 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -13,7 +13,6 @@
 
 #define DEBUG_TYPE "mblaze-isel"
 #include "MBlaze.h"
-#include "MBlazeISelLowering.h"
 #include "MBlazeMachineFunction.h"
 #include "MBlazeRegisterInfo.h"
 #include "MBlazeSubtarget.h"
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index f0864d0..23889b1 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -185,7 +185,8 @@ unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const {
   return 2;
 }
 
-SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
   switch (Op.getOpcode())
   {
     case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
@@ -201,10 +202,9 @@ SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 //===----------------------------------------------------------------------===//
 //  Lower helper functions
 //===----------------------------------------------------------------------===//
-MachineBasicBlock* MBlazeTargetLowering::
-EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
-                            DenseMap<MachineBasicBlock*,
-                            MachineBasicBlock*> *EM) const {
+MachineBasicBlock*
+MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
 
@@ -254,12 +254,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
 
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-          e = BB->succ_end(); i != e; ++i) {
-      EM->insert(std::make_pair(*i, finish));
+          e = BB->succ_end(); i != e; ++i)
       finish->addSuccessor(*i);
-    }
 
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
@@ -350,12 +347,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
 
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-          e = BB->succ_end(); i != e; ++i) {
-      EM->insert(std::make_pair(*i, dneBB));
+          e = BB->succ_end(); i != e; ++i)
       dneBB->addSuccessor(*i);
-    }
 
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
@@ -387,7 +381,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
 //===----------------------------------------------------------------------===//
 //
 
-SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   SDValue TrueVal = Op.getOperand(2);
@@ -409,23 +404,23 @@ SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MBlazeTargetLowering::
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
 
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
 }
 
 SDValue MBlazeTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("TLS not implemented for MicroBlaze.");
   return SDValue(); // Not reached
 }
 
 SDValue MBlazeTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   SDValue ResNode;
   SDValue HiPart;
   // FIXME there isn't actually debug info here
@@ -442,11 +437,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MBlazeTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   SDValue ResNode;
   EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  Constant *C = N->getConstVal();
+  const Constant *C = N->getConstVal();
   SDValue Zero = DAG.getConstant(0, PtrVT);
   DebugLoc dl = Op.getDebugLoc();
 
@@ -455,9 +450,14 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
 }
 
-SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>();
+
   DebugLoc dl = Op.getDebugLoc();
-  SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
@@ -533,7 +533,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
           const SmallVectorImpl<ISD::OutputArg> &Outs,
           const SmallVectorImpl<ISD::InputArg> &Ins,
           DebugLoc dl, SelectionDAG &DAG,
-          SmallVectorImpl<SDValue> &InVals) {
+          SmallVectorImpl<SDValue> &InVals) const {
   // MBlaze does not yet support tail call optimization
   isTailCall = false;
 
@@ -669,7 +669,7 @@ SDValue MBlazeTargetLowering::
 LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
                 bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals) {
+                SmallVectorImpl<SDValue> &InVals) const {
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -699,13 +699,13 @@ SDValue MBlazeTargetLowering::
 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      DebugLoc dl, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals) {
+                     SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
 
   unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
-  VarArgsFrameIndex = 0;
+  MBlazeFI->setVarArgsFrameIndex(0);
 
   // Used with vargs to acumulate store chains.
   std::vector<SDValue> OutChains;
@@ -818,8 +818,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
 
       // Record the frame index of the first variable argument
       // which is a value necessary to VASTART.
-      if (!VarArgsFrameIndex)
-        VarArgsFrameIndex = FI;
+      if (!MBlazeFI->getVarArgsFrameIndex())
+        MBlazeFI->setVarArgsFrameIndex(FI);
     }
   }
 
@@ -841,7 +841,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
 SDValue MBlazeTargetLowering::
 LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
             const SmallVectorImpl<ISD::OutputArg> &Outs,
-            DebugLoc dl, SelectionDAG &DAG) {
+            DebugLoc dl, SelectionDAG &DAG) const {
   // CCValAssign - represent the assignment of
   // the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index f8b1470..9f9ac89 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -63,14 +63,11 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
 
   class MBlazeTargetLowering : public TargetLowering  {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-
   public:
-
     explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific
     //  DAG node.
@@ -90,22 +87,22 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     // Lower Operand specifics
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -114,17 +111,17 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index 08d4dca..1f956c1 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -79,11 +79,14 @@ private:
   /// relocation models.
   unsigned GlobalBaseReg;
 
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   MBlazeFunctionInfo(MachineFunction& MF) 
   : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
     GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false),
-    SRetReturnReg(0), GlobalBaseReg(0)
+    SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0)
   {}
 
   int getFPStackOffset() const { return FPStackOffset; }
@@ -129,6 +132,9 @@ public:
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index a12310a..e15176e 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -243,7 +243,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
 // if frame pointer elimination is disabled.
 bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 // This function eliminate ADJCALLSTACKDOWN,
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 6a94491..1fec9e6 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -40,7 +40,8 @@ def IIPseudo           : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // MBlaze Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
-def MBlazeGenericItineraries : ProcessorItineraries<[
+def MBlazeGenericItineraries : ProcessorItineraries<
+  [ALU, IMULDIV], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
new file mode 100644
index 0000000..105e42a
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-selectiondag-info"
+#include "MBlazeSelectionDAGInfo.h"
+using namespace llvm;
+
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() {
+}
+
+MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
+}
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
new file mode 100644
index 0000000..11e6879
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESELECTIONDAGINFO_H
+#define MBLAZESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  MBlazeSelectionDAGInfo();
+  ~MBlazeSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 85c975c..9bf9898 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -51,8 +51,8 @@ namespace llvm {
     virtual const MBlazeRegisterInfo *getRegisterInfo() const
     { return &InstrInfo.getRegisterInfo(); }
 
-    virtual MBlazeTargetLowering   *getTargetLowering() const
-    { return const_cast<MBlazeTargetLowering*>(&TLInfo); }
+    virtual const MBlazeTargetLowering *getTargetLowering() const
+    { return &TLInfo; }
 
     const TargetIntrinsicInfo *getIntrinsicInfo() const
     { return &IntrinsicInfo; }
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 79c9494..05c01ef 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -11,6 +11,7 @@
 #include "MBlazeSubtarget.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -22,14 +23,14 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
 
   SmallDataSection =
-    getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
 
   SmallBSSSection =
-    getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
+    getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getBSS());
 
 }
 
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
index f4d7d8a..d1d9a11 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
@@ -78,6 +78,16 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
   return Ctx.GetOrCreateSymbol(Name.str());
 }
 
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
 MCOperand MSP430MCInstLower::
 LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
   // FIXME: We would like an efficient form for this, so we don't have to do a
@@ -131,6 +141,8 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_ConstantPoolIndex:
       MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
       break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
     }
 
     OutMI.addOperand(MCOp);
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
index a2b99ae..f9620e8 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
@@ -42,6 +42,7 @@ public:
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
   MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
   MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
 };
 
 }
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 29abe46..a3f60d2 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_target(MSP430CodeGen
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
+  MSP430SelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 836e425..68cb342 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -157,7 +157,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
           NewSize = 6;
         }
         // Uncond branch to the real destination.
-        I = BuildMI(MBB, I, dl, TII->get(MSP430::B)).addMBB(Dest);
+        I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
 
         // Remove the old branch from the function.
         OldBranch->eraseFromParent();
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 911cfcb..7b328bb 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430.h"
-#include "MSP430ISelLowering.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -45,9 +44,9 @@ namespace {
     } Base;
 
     int16_t Disp;
-    GlobalValue *GV;
-    Constant *CP;
-    BlockAddress *BlockAddr;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
     const char *ES;
     int JT;
     unsigned Align;    // CP alignment.
@@ -100,7 +99,7 @@ namespace {
 ///
 namespace {
   class MSP430DAGToDAGISel : public SelectionDAGISel {
-    MSP430TargetLowering &Lowering;
+    const MSP430TargetLowering &Lowering;
     const MSP430Subtarget &Subtarget;
 
   public:
@@ -364,7 +363,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
                                                unsigned Opc8, unsigned Opc16) {
   if (N1.getOpcode() == ISD::LOAD &&
       N1.hasOneUse() &&
-      IsLegalToFold(N1, Op, Op)) {
+      IsLegalToFold(N1, Op, Op, OptLevel)) {
     LoadSDNode *LD = cast<LoadSDNode>(N1);
     if (!isValidIndexedLoad(LD))
       return NULL;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index e6c7e1e..c3e2bdf7 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -110,8 +110,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::ROTR,             MVT::i16,   Expand);
   setOperationAction(ISD::GlobalAddress,    MVT::i16,   Custom);
   setOperationAction(ISD::ExternalSymbol,   MVT::i16,   Custom);
+  setOperationAction(ISD::BlockAddress,     MVT::i16,   Custom);
   setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
-  setOperationAction(ISD::BRIND,            MVT::Other, Expand);
   setOperationAction(ISD::BR_CC,            MVT::i8,    Custom);
   setOperationAction(ISD::BR_CC,            MVT::i16,   Custom);
   setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
@@ -176,12 +176,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   }
 }
 
-SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   case ISD::SHL: // FALLTHROUGH
   case ISD::SRL:
   case ISD::SRA:              return LowerShifts(Op, DAG);
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
   case ISD::ExternalSymbol:   return LowerExternalSymbol(Op, DAG);
   case ISD::SETCC:            return LowerSETCC(Op, DAG);
   case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
@@ -252,7 +254,8 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
                                              &Ins,
                                            DebugLoc dl,
                                            SelectionDAG &DAG,
-                                           SmallVectorImpl<SDValue> &InVals) {
+                                           SmallVectorImpl<SDValue> &InVals)
+                                             const {
 
   switch (CallConv) {
   default:
@@ -264,7 +267,7 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
    if (Ins.empty())
      return Chain;
    else {
-    llvm_report_error("ISRs cannot have arguments");
+    report_fatal_error("ISRs cannot have arguments");
     return SDValue();
    }
   }
@@ -277,7 +280,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
                                 DebugLoc dl, SelectionDAG &DAG,
-                                SmallVectorImpl<SDValue> &InVals) {
+                                SmallVectorImpl<SDValue> &InVals) const {
   // MSP430 target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -289,7 +292,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
                           Outs, Ins, dl, DAG, InVals);
   case CallingConv::MSP430_INTR:
-    llvm_report_error("ISRs cannot be called directly");
+    report_fatal_error("ISRs cannot be called directly");
     return SDValue();
   }
 }
@@ -306,7 +309,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
                                           &Ins,
                                         DebugLoc dl,
                                         SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -383,14 +387,14 @@ SDValue
 MSP430TargetLowering::LowerReturn(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                  DebugLoc dl, SelectionDAG &DAG) {
+                                  DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
 
   // ISRs cannot return any value.
   if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
-    llvm_report_error("ISRs cannot return any value");
+    report_fatal_error("ISRs cannot return any value");
     return SDValue();
   }
 
@@ -445,7 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                        &Outs,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                      DebugLoc dl, SelectionDAG &DAG,
-                                     SmallVectorImpl<SDValue> &InVals) {
+                                     SmallVectorImpl<SDValue> &InVals) const {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -568,7 +572,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -589,7 +593,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 }
 
 SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
-                                          SelectionDAG &DAG) {
+                                          SelectionDAG &DAG) const {
   unsigned Opc = Op.getOpcode();
   SDNode* N = Op.getNode();
   EVT VT = Op.getValueType();
@@ -632,7 +636,8 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
   return Victim;
 }
 
-SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
+                                                 SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
 
@@ -643,7 +648,7 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
-                                                  SelectionDAG &DAG) {
+                                                  SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
   SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
@@ -651,6 +656,15 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
   return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
 }
 
+SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
 static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
                        ISD::CondCode CC,
                        DebugLoc dl, SelectionDAG &DAG) {
@@ -734,7 +748,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
 }
 
 
-SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS   = Op.getOperand(2);
@@ -749,8 +763,7 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
                      Chain, Dest, TargetCC, Flag);
 }
 
-
-SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS   = Op.getOperand(0);
   SDValue RHS   = Op.getOperand(1);
   DebugLoc dl   = Op.getDebugLoc();
@@ -830,7 +843,8 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
-SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
   SDValue LHS    = Op.getOperand(0);
   SDValue RHS    = Op.getOperand(1);
   SDValue TrueV  = Op.getOperand(2);
@@ -852,7 +866,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
-                                               SelectionDAG &DAG) {
+                                               SelectionDAG &DAG) const {
   SDValue Val = Op.getOperand(0);
   EVT VT      = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
@@ -864,7 +878,8 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
                      DAG.getValueType(Val.getValueType()));
 }
 
-SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+SDValue
+MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
   int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -880,7 +895,8 @@ SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
 }
 
-SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
+                                              SelectionDAG &DAG) const {
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -900,7 +916,8 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
                      RetAddrFI, NULL, 0, false, false, 0);
 }
 
-SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
+                                             SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
@@ -999,8 +1016,7 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
 
 MachineBasicBlock*
 MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
-                                     MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                     MachineBasicBlock *BB) const {
   MachineFunction *F = BB->getParent();
   MachineRegisterInfo &RI = F->getRegInfo();
   DebugLoc dl = MI->getDebugLoc();
@@ -1052,11 +1068,6 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
   // block to the block containing instructions after shift.
   RemBB->transferSuccessors(BB);
 
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-         SE = BB->succ_end(); SI != SE; ++SI)
-    EM->insert(std::make_pair(*SI, RemBB));
-
   // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
   BB->addSuccessor(LoopBB);
   BB->addSuccessor(RemBB);
@@ -1111,14 +1122,13 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
 
 MachineBasicBlock*
 MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                  MachineBasicBlock *BB) const {
   unsigned Opc = MI->getOpcode();
 
   if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
       Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
       Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
-    return EmitShiftInstr(MI, BB, EM);
+    return EmitShiftInstr(MI, BB);
 
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
@@ -1149,10 +1159,6 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     .addImm(MI->getOperand(3).getImm());
   F->insert(I, copy0MBB);
   F->insert(I, copy1MBB);
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 
-         SE = BB->succ_end(); SI != SE; ++SI)
-    EM->insert(std::make_pair(*SI, copy1MBB));
   // Update machine-CFG edges by transferring all successors of the current
   // block to the new block which will contain the Phi node for the select.
   copy1MBB->transferSuccessors(BB);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 87a790b..01c5071 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -74,7 +74,7 @@ namespace llvm {
     explicit MSP430TargetLowering(MSP430TargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific
     /// DAG node.
@@ -83,16 +83,17 @@ namespace llvm {
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
-    SDValue LowerShifts(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+    SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     TargetLowering::ConstraintType
     getConstraintType(const std::string &Constraint) const;
@@ -117,11 +118,9 @@ namespace llvm {
     virtual bool isZExtFree(EVT VT1, EVT VT2) const;
 
     MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                                   MachineBasicBlock *BB) const;
     MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
-                                      MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                      MachineBasicBlock *BB) const;
 
   private:
     SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
@@ -130,7 +129,7 @@ namespace llvm {
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCCCArguments(SDValue Chain,
                               CallingConv::ID CallConv,
@@ -138,33 +137,33 @@ namespace llvm {
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl,
                               SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals);
+                              SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
                                             SDValue &Base,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 0381904..2b09b3d 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -176,7 +176,9 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
     if (I->isDebugValue())
       continue;
     if (I->getOpcode() != MSP430::JMP &&
-        I->getOpcode() != MSP430::JCC)
+        I->getOpcode() != MSP430::JCC &&
+        I->getOpcode() != MSP430::Br &&
+        I->getOpcode() != MSP430::Bm)
       break;
     // Remove the branch.
     I->eraseFromParent();
@@ -256,6 +258,11 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     if (!I->getDesc().isBranch())
       return true;
 
+    // Cannot handle indirect branches.
+    if (I->getOpcode() == MSP430::Br ||
+        I->getOpcode() == MSP430::Bm)
+      return true;
+
     // Handle unconditional branches.
     if (I->getOpcode() == MSP430::JMP) {
       if (!AllowModify) {
@@ -365,6 +372,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
+    case TargetOpcode::DBG_VALUE:
       return 0;
     case TargetOpcode::INLINEASM: {
       const MachineFunction *MF = MI->getParent()->getParent();
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 144ba26..6b9a2f2 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -76,8 +76,8 @@ def memdst : Operand<i16> {
   let MIOperandInfo = (ops GR16, i16imm);
 }
 
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT> {
+// Short jump targets have OtherVT type and are printed as pcrel imm values.
+def jmptarget : Operand<OtherVT> {
   let PrintMethod = "printPCRelImmOperand";
 }
 
@@ -169,21 +169,27 @@ let isBranch = 1, isTerminator = 1 in {
 // Direct branch
 let isBarrier = 1 in {
   // Short branch
-  def JMP : CJForm<0, 0,
-                   (outs), (ins brtarget:$dst),
+  def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
                    "jmp\t$dst",
                    [(br bb:$dst)]>;
-  // Long branch
-  def B   : I16ri<0,
-                  (outs), (ins brtarget:$dst),
-                  "br\t$dst",
-                  []>;
+  let isIndirectBranch = 1 in {
+    // Long branches
+    def Bi  : I16ri<0, (outs), (ins i16imm:$brdst),
+                    "br\t$brdst",
+                    [(brind tblockaddress:$brdst)]>;
+    def Br  : I16rr<0, (outs), (ins GR16:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind GR16:$brdst)]>;
+    def Bm  : I16rm<0, (outs), (ins memsrc:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind (load addr:$brdst))]>;
+  }
 }
 
 // Conditional branches
 let Uses = [SRW] in
   def JCC : CJForm<0, 0,
-                   (outs), (ins brtarget:$dst, cc:$cc),
+                   (outs), (ins jmptarget:$dst, cc:$cc),
                    "j$cc\t$dst",
                    [(MSP430brcc bb:$dst, imm:$cc)]>;
 } // isBranch, isTerminator
@@ -1126,16 +1132,21 @@ def : Pat<(i8 (trunc GR16:$src)),
 // GlobalAddress, ExternalSymbol
 def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
 def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
 
 def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)),
           (ADD16ri GR16:$src1, tglobaladdr:$src2)>;
 def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)),
           (ADD16ri GR16:$src1, texternalsym:$src2)>;
+def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)),
+          (ADD16ri GR16:$src1, tblockaddress:$src2)>;
 
 def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
           (MOV16mi addr:$dst, tglobaladdr:$src)>;
 def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
           (MOV16mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV16mi addr:$dst, tblockaddress:$src)>;
 
 // calls
 def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index d91783a..0cae267 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -138,7 +138,7 @@ MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
 bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  return (NoFramePointerElim ||
+  return (DisableFramePointerElim(MF) ||
           MF.getFrameInfo()->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
 }
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
new file mode 100644
index 0000000..a54c929
--- /dev/null
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-selectiondag-info"
+#include "MSP430SelectionDAGInfo.h"
+using namespace llvm;
+
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() {
+}
+
+MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
+}
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
new file mode 100644
index 0000000..c952ab7
--- /dev/null
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430SELECTIONDAGINFO_H
+#define MSP430SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  MSP430SelectionDAGInfo();
+  ~MSP430SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index d93ac5c..68bde9a 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -50,8 +50,8 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 
-  virtual MSP430TargetLowering *getTargetLowering() const {
-    return const_cast<MSP430TargetLowering*>(&TLInfo);
+  virtual const MSP430TargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 1d5c511..4ef017a 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -22,11 +22,12 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-static bool isAcceptableChar(char C) {
+static bool isAcceptableChar(char C, bool AllowPeriod) {
   if ((C < 'a' || C > 'z') &&
       (C < 'A' || C > 'Z') &&
       (C < '0' || C > '9') &&
-      C != '_' && C != '$' && C != '.' && C != '@')
+      C != '_' && C != '$' && C != '@' &&
+      !(AllowPeriod && C == '.'))
     return false;
   return true;
 }
@@ -54,8 +55,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
   
   // If any of the characters in the string is an unacceptable character, force
   // quotes.
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
   for (unsigned i = 0, e = Str.size(); i != e; ++i)
-    if (!isAcceptableChar(Str[i]))
+    if (!isAcceptableChar(Str[i], AllowPeriod))
       return true;
   return false;
 }
@@ -70,9 +72,10 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
     MangleLetter(OutName, Str[0]);
     Str = Str.substr(1);
   }
-  
+
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
   for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-    if (!isAcceptableChar(Str[i]))
+    if (!isAcceptableChar(Str[i], AllowPeriod))
       MangleLetter(OutName, Str[i]);
     else
       OutName.push_back(Str[i]);
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
index 56c68a6..d3099d2 100644
--- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMMipsAsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMMipsAsmPrinter
   MipsAsmPrinter.cpp
-  )
-add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
-\ No newline at end of file
+  )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 6974371..d269153 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -306,7 +306,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
 void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
                                       raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.getType() == MachineOperand::MO_Immediate)
+  if (MO.isImm())
     O << (unsigned short int)MO.getImm();
   else 
     printOperand(MI, opNum, O);
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 0e3bf5a..a77802a 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(MipsCodeGen
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
   MipsTargetObjectFile.cpp
+  MipsSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index c4746db..ee85a3f3 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -13,7 +13,6 @@
 
 #define DEBUG_TYPE "mips-isel"
 #include "Mips.h"
-#include "MipsISelLowering.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "MipsSubtarget.h"
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 584b887..e979c3f 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -166,7 +166,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
 }
 
 SDValue MipsTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) 
+LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   switch (Op.getOpcode()) 
   {
@@ -252,8 +252,7 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
 
 MachineBasicBlock *
 MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   bool isFPCmp = false;
   DebugLoc dl = MI->getDebugLoc();
@@ -301,12 +300,9 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
-          e = BB->succ_end(); i != e; ++i) {
-      EM->insert(std::make_pair(*i, sinkMBB));
+          e = BB->succ_end(); i != e; ++i)
       sinkMBB->addSuccessor(*i);
-    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
     while(!BB->succ_empty())
@@ -341,7 +337,7 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 //===----------------------------------------------------------------------===//
 
 SDValue MipsTargetLowering::
-LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG)
+LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
 {
   if (!Subtarget->isMips1())
     return Op;
@@ -374,7 +370,7 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG)
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue Chain = Op.getOperand(0);
   SDValue Size = Op.getOperand(1);
@@ -398,7 +394,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerANDOR(SDValue Op, SelectionDAG &DAG)
+LowerANDOR(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue LHS   = Op.getOperand(0);
   SDValue RHS   = Op.getOperand(1);
@@ -419,7 +415,7 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerBRCOND(SDValue Op, SelectionDAG &DAG)
+LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 {
   // The first operand is the chain, the second is the condition, the third is 
   // the block to branch to if the condition is true.
@@ -441,7 +437,7 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerSETCC(SDValue Op, SelectionDAG &DAG)
+LowerSETCC(SDValue Op, SelectionDAG &DAG) const
 {
   // The operands to this are the left and right operands to compare (ops #0, 
   // and #1) and the condition code to compare them with (op #2) as a 
@@ -457,7 +453,7 @@ LowerSETCC(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerSELECT(SDValue Op, SelectionDAG &DAG) 
+LowerSELECT(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue Cond  = Op.getOperand(0); 
   SDValue True  = Op.getOperand(1);
@@ -481,10 +477,11 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG)
                      Cond, True, False, CCNode);
 }
 
-SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+                                               SelectionDAG &DAG) const {
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
     SDVTList VTs = DAG.getVTList(MVT::i32);
@@ -525,14 +522,14 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue MipsTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
   llvm_unreachable("TLS not implemented for MIPS.");
   return SDValue(); // Not reached
 }
 
 SDValue MipsTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG) 
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue ResNode;
   SDValue HiPart; 
@@ -560,11 +557,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue MipsTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG) 
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue ResNode;
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  Constant *C = N->getConstVal();
+  const Constant *C = N->getConstVal();
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
 
@@ -596,9 +593,13 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   return ResNode;
 }
 
-SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+
   DebugLoc dl = Op.getDebugLoc();
-  SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
@@ -769,7 +770,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals) {
+                              SmallVectorImpl<SDValue> &InVals) const {
   // MIPs target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -963,7 +964,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) {
+                                    SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -995,14 +996,15 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
-  VarArgsFrameIndex = 0;
+  MipsFI->setVarArgsFrameIndex(0);
 
   // Used with vargs to acumulate store chains.
   std::vector<SDValue> OutChains;
@@ -1143,8 +1145,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Record the frame index of the first variable argument
       // which is a value necessary to VASTART.
-      if (!VarArgsFrameIndex)
-        VarArgsFrameIndex = FI;
+      if (!MipsFI->getVarArgsFrameIndex())
+        MipsFI->setVarArgsFrameIndex(FI);
     }
   }
 
@@ -1167,7 +1169,7 @@ SDValue
 MipsTargetLowering::LowerReturn(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                DebugLoc dl, SelectionDAG &DAG) {
+                                DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of
   // the return value to a location
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 7256617..f2de489 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -68,14 +68,11 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   
   class MipsTargetLowering : public TargetLowering  {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-
   public:
-
     explicit MipsTargetLowering(MipsTargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific 
     //  DAG node.
@@ -96,27 +93,27 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     // Lower Operand specifics
-    SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -125,17 +122,17 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index a300f49..5723f9e 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -80,11 +80,15 @@ private:
   /// relocation models.
   unsigned GlobalBaseReg;
 
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   MipsFunctionInfo(MachineFunction& MF) 
   : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
     FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), 
-    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0)
+    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+    VarArgsFrameIndex(0)
   {}
 
   int getFPStackOffset() const { return FPStackOffset; }
@@ -133,6 +137,9 @@ public:
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f43e69b..478da84 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -338,7 +338,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
 bool MipsRegisterInfo::
 hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 // This function eliminate ADJCALLSTACKDOWN, 
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 0c3ca573..616a79b 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -40,7 +40,7 @@ def IIPseudo           : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // Mips Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
-def MipsGenericItineraries : ProcessorItineraries<[
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
new file mode 100644
index 0000000..72c149d
--- /dev/null
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-selectiondag-info"
+#include "MipsSelectionDAGInfo.h"
+using namespace llvm;
+
+MipsSelectionDAGInfo::MipsSelectionDAGInfo() {
+}
+
+MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
+}
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
new file mode 100644
index 0000000..6eaf0c9
--- /dev/null
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSELECTIONDAGINFO_H
+#define MIPSSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  MipsSelectionDAGInfo();
+  ~MipsSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index c3428be..cd671cf 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -47,8 +47,8 @@ namespace llvm {
       return &InstrInfo.getRegisterInfo();
     }
 
-    virtual MipsTargetLowering   *getTargetLowering() const { 
-      return const_cast<MipsTargetLowering*>(&TLInfo); 
+    virtual const MipsTargetLowering *getTargetLowering() const { 
+      return &TLInfo;
     }
 
     // Pass Pipeline Configuration
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 0fb423d..405f419 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -11,6 +11,7 @@
 #include "MipsSubtarget.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -26,14 +27,14 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
  
   SmallDataSection =
-    getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
+    getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
   
   SmallBSSSection =
-    getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
+    getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+                               SectionKind::getBSS());
   
 }
 
diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
index 2e1b809..d36bb8e 100644
--- a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,9 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMPIC16AsmPrinter
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMPIC16AsmPrinter
   PIC16AsmPrinter.cpp
-  )
+  )
 add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen)
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index c46db46..b665817 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "PIC16AsmPrinter.h"
 #include "PIC16Section.h"
 #include "PIC16MCAsmInfo.h"
+#include "PIC16MachineFunctionInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
@@ -36,9 +37,8 @@ using namespace llvm;
 
 PIC16AsmPrinter::PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
 : AsmPrinter(TM, Streamer), DbgInfo(Streamer, TM.getMCAsmInfo()) {
-  PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering());
   PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo());
-  PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering();
+  PTOF = &getObjFileLowering();
 }
 
 void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -379,6 +379,8 @@ bool PIC16AsmPrinter::doFinalization(Module &M) {
 void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   const TargetData *TD = TM.getTargetData();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+
   // Emit the data section name.
   
   PIC16Section *fPDataSection =
@@ -420,7 +422,7 @@ void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
                           Twine(" RES ") + Twine(ArgSize));
 
   // Emit temporary space
-  int TempSize = PTLI->GetTmpSize();
+  int TempSize = FuncInfo->getTmpSize();
   if (TempSize > 0)
     OutStreamer.EmitRawText(PAN::getTempdataLabel(CurrentFnSym->getName()) +
                             Twine(" RES  ") + Twine(TempSize));
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index e27778f..a424c27 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -37,8 +37,8 @@ namespace llvm {
       return "PIC16 Assembly Printer";
     }
     
-    PIC16TargetObjectFile &getObjFileLowering() const {
-      return (PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering();
+    const PIC16TargetObjectFile &getObjFileLowering() const {
+      return (const PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering();
     }
 
     bool runOnMachineFunction(MachineFunction &F);
@@ -76,8 +76,7 @@ namespace llvm {
     }
     
   private:
-    PIC16TargetObjectFile *PTOF;
-    PIC16TargetLowering *PTLI;
+    const PIC16TargetObjectFile *PTOF;
     PIC16DbgInfo DbgInfo;
     const PIC16MCAsmInfo *PMAI;
     std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls.
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
index 208b067..cd4afe8 100644
--- a/lib/Target/PIC16/CMakeLists.txt
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -22,4 +22,5 @@ add_llvm_target(PIC16
   PIC16Subtarget.cpp
   PIC16TargetMachine.cpp
   PIC16TargetObjectFile.cpp
+  PIC16SelectionDAGInfo.cpp
   )
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index 8d067de..cee55f4 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -21,6 +21,7 @@
 #include <sstream>
 #include <cstring>
 #include <string>
+#include <vector>
 
 namespace llvm {
   class PIC16TargetMachine;
@@ -52,17 +53,34 @@ namespace PIC16CC {
       UDATA_SHR
     };
 
+  class ESNames {
+    std::vector<char*> stk;
+    ESNames() {}
+    public:
+    ~ESNames() {
+      std::vector<char*>::iterator it = stk.end();
+      it--;
+      while(stk.end() != stk.begin())
+        {
+        char* p = *it;
+        delete [] p;
+        it--;
+        stk.pop_back();
+        }
+    }
 
-  // External symbol names require memory to live till the program end.
-  // So we have to allocate it and keep.
-  // FIXME: Don't leak the allocated strings.
-  inline static const char *createESName (const std::string &name) {
-    char *tmpName = new char[name.size() + 1];
-    memcpy(tmpName, name.c_str(), name.size() + 1);
-    return tmpName;
-  }
-
+    // External symbol names require memory to live till the program end.
+    // So we have to allocate it and keep. Push all such allocations into a 
+    // vector so that they get freed up on termination.
+    inline static const char *createESName (const std::string &name) {
+      static ESNames esn;
+      char *tmpName = new char[name.size() + 1];
+      memcpy(tmpName, name.c_str(), name.size() + 1);
+      esn.stk.push_back(tmpName);
+      return tmpName;
+    }
 
+ };
 
   inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) {
     switch (CC) {
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index 8ed5bf7..f1fcec5 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -14,9 +14,9 @@
 #define DEBUG_TYPE "pic16-isel"
 
 #include "PIC16.h"
-#include "PIC16ISelLowering.h"
 #include "PIC16RegisterInfo.h"
 #include "PIC16TargetMachine.h"
+#include "PIC16MachineFunctionInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
@@ -29,19 +29,16 @@ namespace {
 class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
 
   /// TM - Keep a reference to PIC16TargetMachine.
-  PIC16TargetMachine &TM;
+  const PIC16TargetMachine &TM;
 
   /// PIC16Lowering - This object fully describes how to lower LLVM code to an
   /// PIC16-specific SelectionDAG.
-  PIC16TargetLowering &PIC16Lowering;
+  const PIC16TargetLowering &PIC16Lowering;
 
 public:
   explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : 
         SelectionDAGISel(tm),
-        TM(tm), PIC16Lowering(*TM.getTargetLowering()) { 
-    // Keep PIC16 specific DAGISel to use during the lowering
-    PIC16Lowering.ISel = this;
-  }
+        TM(tm), PIC16Lowering(*TM.getTargetLowering()) {}
   
   // Pass Name
   virtual const char *getPassName() const {
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index d17abb9..f479f46 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -16,6 +16,7 @@
 #include "PIC16ISelLowering.h"
 #include "PIC16TargetObjectFile.h"
 #include "PIC16TargetMachine.h"
+#include "PIC16MachineFunctionInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Function.h"
@@ -24,6 +25,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/ErrorHandling.h"
 
 
@@ -116,7 +118,7 @@ static const char *getIntrinsicName(unsigned opcode) {
   std::string Fullname = prefix + tagname + Basename; 
 
   // The name has to live through program life.
-  return createESName(Fullname);
+  return ESNames::createESName(Fullname);
 }
 
 // getStdLibCallName - Get the name for the standard library function.
@@ -139,12 +141,12 @@ static const char *getStdLibCallName(unsigned opcode) {
   std::string LibCallName = prefix + BaseName;
 
   // The name has to live through program life.
-  return createESName(LibCallName);
+  return ESNames::createESName(LibCallName);
 }
 
 // PIC16TargetLowering Constructor.
 PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
-  : TargetLowering(TM, new PIC16TargetObjectFile()), TmpSize(0) {
+  : TargetLowering(TM, new PIC16TargetObjectFile()) {
  
   Subtarget = &TM.getSubtarget<PIC16Subtarget>();
 
@@ -321,18 +323,29 @@ static SDValue getOutFlag(SDValue &Op) {
   return Flag;
 }
 // Get the TmpOffset for FrameIndex
-unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size) {
+unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size,
+                                                MachineFunction &MF) const {
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+  std::map<unsigned, unsigned> &FiTmpOffsetMap = FuncInfo->getFiTmpOffsetMap();
+
   std::map<unsigned, unsigned>::iterator 
             MapIt = FiTmpOffsetMap.find(FI);
   if (MapIt != FiTmpOffsetMap.end())
       return MapIt->second;
 
   // This FI (FrameIndex) is not yet mapped, so map it
-  FiTmpOffsetMap[FI] = TmpSize; 
-  TmpSize += size;
+  FiTmpOffsetMap[FI] = FuncInfo->getTmpSize(); 
+  FuncInfo->setTmpSize(FuncInfo->getTmpSize() + size);
   return FiTmpOffsetMap[FI];
 }
 
+void PIC16TargetLowering::ResetTmpOffsetMap(SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+  FuncInfo->getFiTmpOffsetMap().clear();
+  FuncInfo->setTmpSize(0);
+}
+
 // To extract chain value from the SDValue Nodes
 // This function will help to maintain the chain extracting
 // code at one place. In case of any change in future it will
@@ -390,7 +403,7 @@ PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call,
 }
 
 const char *
-PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) {
+PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const {
   return PIC16LibcallNames[Call];
 }
 
@@ -398,7 +411,7 @@ SDValue
 PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
                                       EVT RetVT, const SDValue *Ops,
                                       unsigned NumOps, bool isSigned,
-                                      SelectionDAG &DAG, DebugLoc dl) {
+                                      SelectionDAG &DAG, DebugLoc dl) const {
 
   TargetLowering::ArgListTy Args;
   Args.reserve(NumOps);
@@ -456,7 +469,7 @@ const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
 
   switch (N->getOpcode()) {
     case ISD::GlobalAddress:
@@ -483,7 +496,8 @@ void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
-SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N,
+                                              SelectionDAG &DAG) const {
 
   // Currently handling FrameIndex of size MVT::i16 only
   // One example of this scenario is when return value is written on
@@ -518,7 +532,7 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
 }
 
 
-SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { 
+SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) const { 
   StoreSDNode *St = cast<StoreSDNode>(N);
   SDValue Chain = St->getChain();
   SDValue Src = St->getValue();
@@ -636,8 +650,9 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
   }
 }
 
-SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG)
-{
+SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N,
+                                                  SelectionDAG &DAG)
+ const {
   ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0));
   // FIXME there isn't really debug info here
   DebugLoc dl = ES->getDebugLoc();
@@ -651,7 +666,8 @@ SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG)
 }
 
 // ExpandGlobalAddress - 
-SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N,
+                                                 SelectionDAG &DAG) const {
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0));
   // FIXME there isn't really debug info here
   DebugLoc dl = G->getDebugLoc();
@@ -666,7 +682,7 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
 }
 
-bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) {
+bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) const {
   assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!");
 
   if (Op.getOpcode() == ISD::BUILD_PAIR) {
@@ -677,7 +693,7 @@ bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) {
 }
 
 // Return true if DirectAddress is in ROM_SPACE
-bool PIC16TargetLowering::isRomAddress(const SDValue &Op) {
+bool PIC16TargetLowering::isRomAddress(const SDValue &Op) const {
 
   // RomAddress is a GlobalAddress in ROM_SPACE_
   // If the Op is not a GlobalAddress return NULL without checking
@@ -703,7 +719,7 @@ bool PIC16TargetLowering::isRomAddress(const SDValue &Op) {
 // parts of Op in Lo and Hi. 
 
 void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
-                                           SDValue &Lo, SDValue &Hi) {  
+                                           SDValue &Lo, SDValue &Hi) const {
   SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
   EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -720,11 +736,12 @@ void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
 // Legalize FrameIndex into ExternalSymbol and offset.
 void 
 PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
-                                        SDValue &ES, int &Offset) {
+                                        SDValue &ES, int &Offset) const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   const Function *Func = MF.getFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
   const std::string Name = Func->getName();
 
   FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op);
@@ -736,8 +753,8 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
   // the list and add their requested size.
   unsigned FIndex = FR->getIndex();
   const char *tmpName;
-  if (FIndex < ReservedFrameCount) {
-    tmpName = createESName(PAN::getFrameLabel(Name));
+  if (FIndex < FuncInfo->getReservedFrameCount()) {
+    tmpName = ESNames::createESName(PAN::getFrameLabel(Name));
     ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
     Offset = 0;
     for (unsigned i=0; i<FIndex ; ++i) {
@@ -745,9 +762,9 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
     }
   } else {
    // FrameIndex has been made for some temporary storage 
-    tmpName = createESName(PAN::getTempdataLabel(Name));
+    tmpName = ESNames::createESName(PAN::getTempdataLabel(Name));
     ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-    Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex));
+    Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex), MF);
   }
 
   return;
@@ -767,7 +784,7 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
 
 void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, 
                                           SDValue &Lo, SDValue &Hi,
-                                          unsigned &Offset, DebugLoc dl) {
+                                          unsigned &Offset, DebugLoc dl) const {
 
   // Offset, by default, should be 0
   Offset = 0;
@@ -846,7 +863,7 @@ void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG,
   return;
 }
 
-SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) const {
   LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0));
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
@@ -961,7 +978,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain);
 }
 
-SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal shift to lower");
  
@@ -991,7 +1008,7 @@ SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
   return Call;
 }
 
-SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower");
 
@@ -1007,7 +1024,7 @@ SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
 void
 PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   SDValue Op = SDValue(N, 0);
   SDValue Res;
   unsigned i;
@@ -1031,7 +1048,8 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
   }
 }
 
-SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerOperation(SDValue Op,
+                                            SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
     case ISD::ADD:
     case ISD::ADDC:
@@ -1065,7 +1083,7 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
                                                  SelectionDAG &DAG,
-                                                 DebugLoc dl) {
+                                                 DebugLoc dl) const {
   assert (Op.getValueType() == MVT::i8 
           && "illegal value type to store on stack.");
 
@@ -1077,7 +1095,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
   // Put the value on stack.
   // Get a stack slot index and convert to es.
   int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false);
-  const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
   SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
 
   // Store the value to ES.
@@ -1085,14 +1103,14 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
                                DAG.getEntryNode(),
                                Op, ES, 
                                DAG.getConstant (1, MVT::i8), // Banksel.
-                               DAG.getConstant (GetTmpOffsetForFI(FI, 1), 
+                               DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), 
                                                 MVT::i8));
 
   // Load the value from ES.
   SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other);
   SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store,
                              ES, DAG.getConstant (1, MVT::i8),
-                             DAG.getConstant (GetTmpOffsetForFI(FI, 1), 
+                             DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), 
                              MVT::i8));
     
   return Load.getValue(0);
@@ -1103,7 +1121,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
                            SDValue DataAddr_Lo, SDValue DataAddr_Hi,
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG) {
+                           DebugLoc dl, SelectionDAG &DAG) const {
   unsigned NumOps = Outs.size();
 
   // If call has no arguments then do nothing and return.
@@ -1140,7 +1158,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
 SDValue PIC16TargetLowering::
 LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
                          const SmallVectorImpl<ISD::OutputArg> &Outs,
-                         DebugLoc dl, SelectionDAG &DAG) {
+                         DebugLoc dl, SelectionDAG &DAG) const {
   unsigned NumOps = Outs.size();
   std::string Name;
   SDValue Arg, StoreAt;
@@ -1197,7 +1215,7 @@ LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
                         SDValue DataAddr_Lo, SDValue DataAddr_Hi,
                         const SmallVectorImpl<ISD::InputArg> &Ins,
                         DebugLoc dl, SelectionDAG &DAG,
-                        SmallVectorImpl<SDValue> &InVals) {
+                        SmallVectorImpl<SDValue> &InVals) const {
   unsigned RetVals = Ins.size();
 
   // If call does not have anything to return
@@ -1224,7 +1242,7 @@ SDValue PIC16TargetLowering::
 LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
                       const SmallVectorImpl<ISD::InputArg> &Ins,
                       DebugLoc dl, SelectionDAG &DAG,
-                      SmallVectorImpl<SDValue> &InVals) {
+                      SmallVectorImpl<SDValue> &InVals) const {
 
   // Currently handling primitive types only. They will come in
   // i8 parts
@@ -1264,7 +1282,7 @@ SDValue
 PIC16TargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   // Number of values to return 
   unsigned NumRet = Outs.size();
@@ -1275,7 +1293,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain,
   const Function *F = MF.getFunction();
   std::string FuncName = F->getName();
 
-  const char *tmpName = createESName(PAN::getFrameLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName));
   SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
   SDValue BS = DAG.getConstant(1, MVT::i8);
   SDValue RetVal;
@@ -1292,7 +1310,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain,
 void PIC16TargetLowering::
 GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, 
                SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
-               SelectionDAG &DAG) {
+               SelectionDAG &DAG) const {
    assert (Callee.getOpcode() == PIC16ISD::PIC16Connect
            && "Don't know what to do of such callee!!");
    SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
@@ -1358,7 +1376,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
     // PIC16 target does not yet support tail call optimization.
     isTailCall = false;
 
@@ -1409,7 +1427,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     if (IsDirectCall) { 
        // Considering the GlobalAddressNode case here.
        if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-          GlobalValue *GV = G->getGlobal();
+          const GlobalValue *GV = G->getGlobal();
           Callee = DAG.getTargetGlobalAddress(GV, MVT::i8);
           Name = G->getGlobal()->getName();
        } else {// Considering the ExternalSymbol case here
@@ -1419,11 +1437,11 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        }
 
        // Label for argument passing
-       const char *argFrame = createESName(PAN::getArgsLabel(Name));
+       const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name));
        ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8);
 
        // Label for reading return value
-       const char *retName = createESName(PAN::getRetvalLabel(Name));
+       const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name));
        RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8);
     } else {
        // if indirect call
@@ -1476,7 +1494,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                      DataAddr_Hi, Ins, dl, DAG, InVals);
 }
 
-bool PIC16TargetLowering::isDirectLoad(const SDValue Op) {
+bool PIC16TargetLowering::isDirectLoad(const SDValue Op) const {
   if (Op.getOpcode() == PIC16ISD::PIC16Load)
     if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress
      || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol)
@@ -1490,7 +1508,7 @@ bool PIC16TargetLowering::isDirectLoad(const SDValue Op) {
 // no instruction that can operation on two registers. Most insns take
 // one register and one memory operand (addwf) / Constant (addlw).
 bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, 
-                      SelectionDAG &DAG) {
+                      SelectionDAG &DAG) const {
   // If one of the operand is a constant, return false.
   if (Op.getOperand(0).getOpcode() == ISD::Constant ||
       Op.getOperand(1).getOpcode() == ISD::Constant)
@@ -1512,7 +1530,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
       // Direct load operands are folded in binary operations. But before folding
       // verify if this folding is legal. Fold only if it is legal otherwise
       // convert this direct load to a separate memory operation.
-      if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode()))
+      if (SelectionDAGISel::IsLegalToFold(Op.getOperand(0),
+                                          Op.getNode(), Op.getNode(),
+                                          CodeGenOpt::Default))
         return false;
       else 
         MemOp = 0;
@@ -1539,7 +1559,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
       // Direct load operands are folded in binary operations. But before folding
       // verify if this folding is legal. Fold only if it is legal otherwise
       // convert this direct load to a separate memory operation.
-      if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode()))
+      if (SelectionDAGISel::IsLegalToFold(Op.getOperand(1),
+                                          Op.getNode(), Op.getNode(),
+                                          CodeGenOpt::Default))
          return false;
       else 
          MemOp = 1; 
@@ -1550,7 +1572,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
 
 // LowerBinOp - Lower a commutative binary operation that does not
 // affect status flag carry.
-SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
   // We should have handled larger operands in type legalizer itself.
@@ -1571,7 +1593,7 @@ SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) {
 
 // LowerADD - Lower all types of ADD operations including the ones
 // that affects carry.
-SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal add to lower");
   DebugLoc dl = Op.getDebugLoc();
@@ -1600,7 +1622,7 @@ SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) {
     return Op;
 }
 
-SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // We should have handled larger operands in type legalizer itself.
   assert (Op.getValueType() == MVT::i8 && "illegal sub to lower");
@@ -1647,15 +1669,19 @@ SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) {
     return Op;
 }
 
-void PIC16TargetLowering::InitReservedFrameCount(const Function *F) {
+void PIC16TargetLowering::InitReservedFrameCount(const Function *F,
+                                                 SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
+
   unsigned NumArgs = F->arg_size();
 
   bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID);
 
   if (isVoidFunc)
-    ReservedFrameCount = NumArgs;
+    FuncInfo->setReservedFrameCount(NumArgs);
   else
-    ReservedFrameCount = NumArgs + 1;
+    FuncInfo->setReservedFrameCount(NumArgs + 1);
 }
 
 // LowerFormalArguments - Argument values are loaded from the
@@ -1669,7 +1695,8 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                           DebugLoc dl,
                                           SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
   unsigned NumArgVals = Ins.size();
 
   // Get the callee's name to create the <fname>.args label to pass args.
@@ -1678,12 +1705,12 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain,
   std::string FuncName = F->getName();
 
   // Reset the map of FI and TmpOffset 
-  ResetTmpOffsetMap();
+  ResetTmpOffsetMap(DAG);
   // Initialize the ReserveFrameCount
-  InitReservedFrameCount(F);
+  InitReservedFrameCount(F, DAG);
 
   // Create the <fname>.args external symbol.
-  const char *tmpName = createESName(PAN::getArgsLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName));
   SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
 
   // Load arg values from the label + offset.
@@ -1782,7 +1809,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
 // Returns appropriate CMP insn and corresponding condition code in PIC16CC
 SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, 
                                          unsigned CC, SDValue &PIC16CC, 
-                                         SelectionDAG &DAG, DebugLoc dl) {
+                                         SelectionDAG &DAG, DebugLoc dl) const {
   PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC;
 
   // PIC16 sub is literal - W. So Swap the operands and condition if needed.
@@ -1846,7 +1873,8 @@ SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS,
 }
 
 
-SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op,
+                                            SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -1874,8 +1902,7 @@ SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 
 MachineBasicBlock *
 PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm();
   DebugLoc dl = MI->getDebugLoc();
@@ -1903,12 +1930,9 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
@@ -1938,7 +1962,7 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 }
 
 
-SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS = Op.getOperand(2);   // LHS of the condition.
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index de14520..eea17f8 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -18,7 +18,6 @@
 #include "PIC16.h"
 #include "PIC16Subtarget.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetLowering.h"
 #include <map>
 
@@ -85,53 +84,52 @@ namespace llvm {
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const;
     virtual MVT::SimpleValueType getCmpLibcallReturnType() const;
-    SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerADD(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSUB(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG) const;
     // Call returns
     SDValue 
     LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
                           const SmallVectorImpl<ISD::InputArg> &Ins,
                           DebugLoc dl, SelectionDAG &DAG,
-                          SmallVectorImpl<SDValue> &InVals);
+                          SmallVectorImpl<SDValue> &InVals) const;
     SDValue 
     LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
                              SDValue DataAddr_Lo, SDValue DataAddr_Hi,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     // Call arguments
     SDValue 
     LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
-                             DebugLoc dl, SelectionDAG &DAG);
+                             DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue 
     LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
                                SDValue DataAddr_Lo, SDValue DataAddr_Hi, 
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
-                               DebugLoc dl, SelectionDAG &DAG);
+                               DebugLoc dl, SelectionDAG &DAG) const;
 
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC,
-                        SelectionDAG &DAG, DebugLoc dl);
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                        SelectionDAG &DAG, DebugLoc dl) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
-
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
     virtual void ReplaceNodeResults(SDNode *N,
                                     SmallVectorImpl<SDValue> &Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
     virtual void LowerOperationWrapper(SDNode *N,
                                        SmallVectorImpl<SDValue> &Results,
-                                       SelectionDAG &DAG);
+                                       SelectionDAG &DAG) const;
 
     virtual SDValue
     LowerFormalArguments(SDValue Chain,
@@ -139,7 +137,7 @@ namespace llvm {
                          bool isVarArg,
                          const SmallVectorImpl<ISD::InputArg> &Ins,
                          DebugLoc dl, SelectionDAG &DAG,
-                         SmallVectorImpl<SDValue> &InVals);
+                         SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -147,19 +145,19 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    SDValue ExpandStore(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG);
-    SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG);
+    SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) const;
+    SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) const;
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
     SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
@@ -168,13 +166,11 @@ namespace llvm {
     // This function returns the Tmp Offset for FrameIndex. If any TmpOffset 
     // already exists for the FI then it returns the same else it creates the 
     // new offset and returns.
-    unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size); 
-    void ResetTmpOffsetMap() { FiTmpOffsetMap.clear(); SetTmpSize(0); }
-    void InitReservedFrameCount(const Function *F); 
-
-    // Return the size of Tmp variable 
-    unsigned GetTmpSize() { return TmpSize; }
-    void SetTmpSize(unsigned Size) { TmpSize = Size; }
+    unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size,
+                               MachineFunction &MF) const;
+    void ResetTmpOffsetMap(SelectionDAG &DAG) const;
+    void InitReservedFrameCount(const Function *F,
+                                SelectionDAG &DAG) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *) const {
@@ -184,43 +180,45 @@ namespace llvm {
   private:
     // If the Node is a BUILD_PAIR representing a direct Address,
     // then this function will return true.
-    bool isDirectAddress(const SDValue &Op);
+    bool isDirectAddress(const SDValue &Op) const;
 
     // If the Node is a DirectAddress in ROM_SPACE then this 
     // function will return true
-    bool isRomAddress(const SDValue &Op);
+    bool isRomAddress(const SDValue &Op) const;
 
     // Extract the Lo and Hi component of Op. 
     void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, 
-                          SDValue &Hi); 
+                          SDValue &Hi) const;
 
 
     // Load pointer can be a direct or indirect address. In PIC16 direct
     // addresses need Banksel and Indirect addresses need to be loaded to
     // FSR first. Handle address specific cases here.
     void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain, 
-                         SDValue &NewPtr, unsigned &Offset, DebugLoc dl);
+                         SDValue &NewPtr, unsigned &Offset, DebugLoc dl) const;
 
     // FrameIndex should be broken down into ExternalSymbol and FrameOffset. 
     void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, 
-                            int &Offset);
+                            int &Offset) const;
 
     // For indirect calls data address of the callee frame need to be
     // extracted. This function fills the arguments DataAddr_Lo and 
     // DataAddr_Hi with the address of the callee frame.
     void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
                         SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
-                        SelectionDAG &DAG); 
+                        SelectionDAG &DAG) const;
 
     // We can not have both operands of a binary operation in W.
     // This function is used to put one operand on stack and generate a load.
-    SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, DebugLoc dl); 
+    SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG,
+                                DebugLoc dl) const; 
 
     // This function checks if we need to put an operand of an operation on
     // stack and generate a load or not.
     // DAG parameter is required to access DAG information during
     // analysis.
-    bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, SelectionDAG &DAG); 
+    bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
+                              SelectionDAG &DAG) const;
 
     /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can
     /// make the right decision when generating code for different targets.
@@ -233,31 +231,15 @@ namespace llvm {
 
     // To set and retrieve the lib call names.
     void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name);
-    const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call);
+    const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const;
 
     // Make PIC16 Libcall.
     SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, 
                              const SDValue *Ops, unsigned NumOps, bool isSigned,
-                             SelectionDAG &DAG, DebugLoc dl);
+                             SelectionDAG &DAG, DebugLoc dl) const;
 
     // Check if operation has a direct load operand.
-    inline bool isDirectLoad(const SDValue Op);
-
-  public:
-    // Keep a pointer to SelectionDAGISel to access its public 
-    // interface (It is required during legalization)
-    SelectionDAGISel   *ISel;
-
-  private:
-    // The frameindexes generated for spill/reload are stack based.
-    // This maps maintain zero based indexes for these FIs.
-    std::map<unsigned, unsigned> FiTmpOffsetMap;
-    unsigned TmpSize;
-
-    // These are the frames for return value and argument passing 
-    // These FrameIndices will be expanded to foo.frame external symbol
-    // and all others will be expanded to foo.tmp external symbol.
-    unsigned ReservedFrameCount; 
+    inline bool isDirectLoad(const SDValue Op) const;
   };
 } // namespace llvm
 
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 365e8b2..9e415e0 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -71,14 +71,14 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator I,
                                          unsigned SrcReg, bool isKill, int FI,
                                          const TargetRegisterClass *RC) const {
-  PIC16TargetLowering *PTLI = TM.getTargetLowering();
+  const PIC16TargetLowering *PTLI = TM.getTargetLowering();
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   const Function *Func = MBB.getParent()->getFunction();
   const std::string FuncName = Func->getName();
 
-  const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
 
   // On the order of operands here: think "movwf SrcReg, tmp_slot, offset".
   if (RC == PIC16::GPRRegisterClass) {
@@ -86,7 +86,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     //MachineRegisterInfo &RI = MF.getRegInfo();
     BuildMI(MBB, I, DL, get(PIC16::movwf))
       .addReg(SrcReg, getKillRegState(isKill))
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
@@ -101,7 +101,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                                  : PIC16::save_fsr1;
     BuildMI(MBB, I, DL, get(opcode))
       .addReg(SrcReg, getKillRegState(isKill))
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
@@ -113,21 +113,21 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator I,
                                           unsigned DestReg, int FI,
                                           const TargetRegisterClass *RC) const {
-  PIC16TargetLowering *PTLI = TM.getTargetLowering();
+  const PIC16TargetLowering *PTLI = TM.getTargetLowering();
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   const Function *Func = MBB.getParent()->getFunction();
   const std::string FuncName = Func->getName();
 
-  const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
 
   // On the order of operands here: think "movf FrameIndex, W".
   if (RC == PIC16::GPRRegisterClass) {
     //MachineFunction &MF = *MBB.getParent();
     //MachineRegisterInfo &RI = MF.getRegInfo();
     BuildMI(MBB, I, DL, get(PIC16::movf), DestReg)
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
@@ -141,7 +141,7 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0 
                                                  : PIC16::restore_fsr1;
     BuildMI(MBB, I, DL, get(opcode), DestReg)
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+      .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
       .addExternalSymbol(tmpName)
       .addImm(1); // Emit banksel for it.
   }
diff --git a/lib/Target/PIC16/PIC16MachineFunctionInfo.h b/lib/Target/PIC16/PIC16MachineFunctionInfo.h
new file mode 100644
index 0000000..bdf5086
--- /dev/null
+++ b/lib/Target/PIC16/PIC16MachineFunctionInfo.h
@@ -0,0 +1,52 @@
+//====- PIC16MachineFuctionInfo.h - PIC16 machine function info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PIC16-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16MACHINEFUNCTIONINFO_H
+#define PIC16MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PIC16MachineFunctionInfo - This class is derived from MachineFunction
+/// private PIC16 target-specific information for each MachineFunction.
+class PIC16MachineFunctionInfo : public MachineFunctionInfo {
+  // The frameindexes generated for spill/reload are stack based.
+  // This maps maintain zero based indexes for these FIs.
+  std::map<unsigned, unsigned> FiTmpOffsetMap;
+  unsigned TmpSize;
+
+  // These are the frames for return value and argument passing 
+  // These FrameIndices will be expanded to foo.frame external symbol
+  // and all others will be expanded to foo.tmp external symbol.
+  unsigned ReservedFrameCount;
+
+public:
+  PIC16MachineFunctionInfo()
+    : TmpSize(0), ReservedFrameCount(0) {}
+
+  explicit PIC16MachineFunctionInfo(MachineFunction &MF)
+    : TmpSize(0), ReservedFrameCount(0) {}
+
+  std::map<unsigned, unsigned> &getFiTmpOffsetMap() { return FiTmpOffsetMap; }
+
+  unsigned getTmpSize() const { return TmpSize; }
+  void setTmpSize(unsigned Size) { TmpSize = Size; }
+
+  unsigned getReservedFrameCount() const { return ReservedFrameCount; }
+  void setReservedFrameCount(unsigned Count) { ReservedFrameCount = Count; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
index 865da35..c282521 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -172,7 +172,7 @@ void PIC16Cloner::CloneAutos(Function *F) {
     VarName = I->getName().str();
     if (PAN::isLocalToFunc(FnName, VarName)) {
       // Auto variable for current function found. Clone it.
-      GlobalVariable *GV = I;
+      const GlobalVariable *GV = I;
 
       const Type *InitTy = GV->getInitializer()->getType();
       GlobalVariable *ClonedGV = 
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
new file mode 100644
index 0000000..76c6c60
--- /dev/null
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- PIC16SelectionDAGInfo.cpp - PIC16 SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PIC16SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-selectiondag-info"
+#include "PIC16SelectionDAGInfo.h"
+using namespace llvm;
+
+PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() {
+}
+
+PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() {
+}
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
new file mode 100644
index 0000000..112480e5
--- /dev/null
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- PIC16SelectionDAGInfo.h - PIC16 SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PIC16 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16SELECTIONDAGINFO_H
+#define PIC16SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  PIC16SelectionDAGInfo();
+  ~PIC16SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
index b11fdd5..849845a 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.h
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -50,8 +50,8 @@ public:
     return &(InstrInfo.getRegisterInfo()); 
   }
 
-  virtual PIC16TargetLowering *getTargetLowering() const { 
-    return const_cast<PIC16TargetLowering*>(&TLInfo); 
+  virtual const PIC16TargetLowering *getTargetLowering() const { 
+    return &TLInfo;
   }
 
   virtual bool addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
index b891c18..ff0f971 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PIC16TargetObjectFile.h"
-#include "PIC16ISelLowering.h"
 #include "PIC16TargetMachine.h"
 #include "PIC16Section.h"
 #include "llvm/DerivedTypes.h"
@@ -27,7 +26,7 @@ PIC16TargetObjectFile::~PIC16TargetObjectFile() {
 
 /// Find a pic16 section. Return null if not found. Do not create one.
 PIC16Section *PIC16TargetObjectFile::
-findPIC16Section(const std::string &Name) {
+findPIC16Section(const std::string &Name) const {
   /// Return if we have an already existing one.
   PIC16Section *Entry = SectionsByName[Name];
   if (Entry)
@@ -134,7 +133,7 @@ void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){
 const MCSection *
 PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const {
   assert(GV->hasInitializer() && "This global doesn't need space");
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
   assert(C->isNullValue() && "Unitialized globals has non-zero initializer");
 
   // Find how much space this global needs.
@@ -169,7 +168,7 @@ PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const {
 const MCSection *
 PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{
   assert(GV->hasInitializer() && "This global doesn't need space");
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
   assert(!C->isNullValue() && "initialized globals has zero initializer");
   assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
          "can allocate initialized RAM data only");
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
index cf8bf84..b1eb9f9 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.h
@@ -122,7 +122,7 @@ namespace llvm {
     void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
     /// Return the section with the given Name. Null if not found.
-    PIC16Section *findPIC16Section(const std::string &Name);
+    PIC16Section *findPIC16Section(const std::string &Name) const;
 
     /// Override section allocations for user specified sections.
     virtual const MCSection *
diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
index 236b264..42cd486 100644
--- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
@@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMPowerPCAsmPrinter
   PPCAsmPrinter.cpp
   )
-add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
-\ No newline at end of file
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 6056564..e35dc57 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include "PPCPredicates.h"
 #include "PPCTargetMachine.h"
 #include "PPCSubtarget.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
@@ -199,8 +200,8 @@ namespace {
                           raw_ostream &O) {
       const MachineOperand &MO = MI->getOperand(OpNo);
       if (TM.getRelocationModel() != Reloc::Static) {
-        if (MO.getType() == MachineOperand::MO_GlobalAddress) {
-          GlobalValue *GV = MO.getGlobal();
+        if (MO.isGlobal()) {
+          const GlobalValue *GV = MO.getGlobal();
           if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
             MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
@@ -213,7 +214,7 @@ namespace {
             return;
           }
         }
-        if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
+        if (MO.isSymbol()) {
           SmallString<128> TempNameStr;
           TempNameStr += StringRef(MO.getSymbolName());
           TempNameStr += StringRef("$stub");
@@ -311,7 +312,7 @@ namespace {
     void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo,
                             raw_ostream &O) {
       const MachineOperand &MO = MI->getOperand(OpNo);
-      assert(MO.getType() == MachineOperand::MO_GlobalAddress);
+      assert(MO.isGlobal());
       MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
 
       // Map symbol -> label of TOC entry.
@@ -405,7 +406,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
   }
   case MachineOperand::MO_GlobalAddress: {
     // Computing the address of a global symbol, not calling it.
-    GlobalValue *GV = MO.getGlobal();
+    const GlobalValue *GV = MO.getGlobal();
     MCSymbol *SymToPrint;
 
     // External or weakly linked global variables need non-lazily-resolved stubs
@@ -535,6 +536,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
 
+  if (MI->getOpcode() == TargetOpcode::DBG_VALUE) {
+    unsigned NOps = MI->getNumOperands();
+    assert(NOps==4);
+    O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason.
+    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+    O << V.getName();
+    O << " <- ";
+    // Frame address.  Currently handles register +- offset only.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+    O << ']';
+    O << "+";
+    printOperand(MI, NOps-2, O);
+    OutStreamer.EmitRawText(O.str());
+    return;
+  }
   // Check for slwi/srwi mnemonics.
   if (MI->getOpcode() == PPC::RLWINM) {
     unsigned char SH = MI->getOperand(2).getImm();
@@ -649,18 +667,18 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
-  TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
   if (TM.getRelocationModel() == Reloc::PIC_) {
     OutStreamer.SwitchSection(
-            TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1",
+           OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
                                       MCSectionMachO::S_SYMBOL_STUBS |
                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                       32, SectionKind::getText()));
   } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
     OutStreamer.SwitchSection(
-            TLOFMacho.getMachOSection("__TEXT","__symbol_stub1",
+           OutContext.getMachOSection("__TEXT","__symbol_stub1",
                                       MCSectionMachO::S_SYMBOL_STUBS |
                                       MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                       16, SectionKind::getText()));
@@ -686,8 +704,8 @@ void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
   bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
   
-  TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
 
   // .lazy_symbol_pointer
   const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
@@ -695,10 +713,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
   // Output stubs for dynamically-linked functions
   if (TM.getRelocationModel() == Reloc::PIC_) {
     const MCSection *StubSection = 
-    TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1",
-                              MCSectionMachO::S_SYMBOL_STUBS |
-                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                              32, SectionKind::getText());
+    OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               32, SectionKind::getText());
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       OutStreamer.SwitchSection(StubSection);
       EmitAlignment(4);
@@ -742,10 +760,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
   }
   
   const MCSection *StubSection =
-    TLOFMacho.getMachOSection("__TEXT","__symbol_stub1",
-                              MCSectionMachO::S_SYMBOL_STUBS |
-                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                              16, SectionKind::getText());
+    OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               16, SectionKind::getText());
   for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
     MCSymbol *Stub = Stubs[i].first;
     MCSymbol *RawSym = Stubs[i].second.getPointer();
@@ -782,8 +800,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
 
   // Darwin/PPC always uses mach-o.
-  TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   MachineModuleInfoMachO &MMIMacho =
     MMI->getObjFileInfo<MachineModuleInfoMachO>();
   
@@ -794,8 +812,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (MAI->doesSupportExceptionHandling() && MMI) {
     // Add the (possibly multiple) personalities to the set of global values.
     // Only referenced functions get into the Personalities list.
-    const std::vector<Function *> &Personalities = MMI->getPersonalities();
-    for (std::vector<Function *>::const_iterator I = Personalities.begin(),
+    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
          E = Personalities.end(); I != E; ++I) {
       if (*I) {
         MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index c997c5c..7ffc5eb 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_target(PowerPCCodeGen
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
   PPCTargetMachine.cpp
+  PPCSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index f7c27d4..361fa70 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -202,7 +202,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
     MachineRelocation R;
     if (MO.isGlobal()) {
       R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                                   MO.getGlobal(), 0,
+                                   const_cast<GlobalValue *>(MO.getGlobal()), 0,
                                    isa<Function>(MO.getGlobal()));
     } else if (MO.isSymbol()) {
       R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 1e32384..3d9f8aa 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,7 +16,6 @@
 #include "PPC.h"
 #include "PPCPredicates.h"
 #include "PPCTargetMachine.h"
-#include "PPCISelLowering.h"
 #include "PPCHazardRecognizers.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -41,8 +40,8 @@ namespace {
   /// instructions for SelectionDAG operations.
   ///
   class PPCDAGToDAGISel : public SelectionDAGISel {
-    PPCTargetMachine &TM;
-    PPCTargetLowering &PPCLowering;
+    const PPCTargetMachine &TM;
+    const PPCTargetLowering &PPCLowering;
     const PPCSubtarget &PPCSubTarget;
     unsigned GlobalBaseReg;
   public:
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 9cd01be..6f11953 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -397,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.
 unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
-  TargetMachine &TM = getTargetMachine();
+  const TargetMachine &TM = getTargetMachine();
   // Darwin passes everything on 4 byte boundary.
   if (TM.getSubtarget<PPCSubtarget>().isDarwin())
     return 4;
@@ -476,7 +476,7 @@ static bool isFloatingPointZero(SDValue Op) {
   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
     // Maybe this has already been legalized into the constant pool?
     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
-      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
         return CFP->getValueAPF().isZero();
   }
   return false;
@@ -1095,10 +1095,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
 //===----------------------------------------------------------------------===//
 
 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  Constant *C = CP->getConstVal();
+  const Constant *C = CP->getConstVal();
   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
   SDValue Zero = DAG.getConstant(0, PtrVT);
   // FIXME there isn't really any debug info here
@@ -1129,7 +1129,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
   return Lo;
 }
 
-SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
@@ -1163,16 +1163,17 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   llvm_unreachable("TLS not implemented for PPC.");
   return SDValue(); // Not reached
 }
 
-SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   DebugLoc DL = Op.getDebugLoc();
 
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true);
   SDValue Zero = DAG.getConstant(0, PtrVT);
   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero);
@@ -1199,10 +1200,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
-                                              SelectionDAG &DAG) {
+                                              SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  GlobalValue *GV = GSDN->getGlobal();
+  const GlobalValue *GV = GSDN->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
   SDValue Zero = DAG.getConstant(0, PtrVT);
   // FIXME there isn't really any debug info here
@@ -1247,7 +1248,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                      false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -1291,17 +1292,14 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
-                              int VarArgsFrameIndex,
-                              int VarArgsStackOffset,
-                              unsigned VarArgsNumGPR,
-                              unsigned VarArgsNumFPR,
-                              const PPCSubtarget &Subtarget) {
+                                      const PPCSubtarget &Subtarget) const {
 
   llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
   return SDValue(); // Not reached
 }
 
-SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
@@ -1343,18 +1341,17 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                                        int VarArgsFrameIndex,
-                                        int VarArgsStackOffset,
-                                        unsigned VarArgsNumGPR,
-                                        unsigned VarArgsNumFPR,
-                                        const PPCSubtarget &Subtarget) {
+                                        const PPCSubtarget &Subtarget) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
   DebugLoc dl = Op.getDebugLoc();
 
   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-    SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
                         false, false, 0);
@@ -1385,14 +1382,16 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   // } va_list[1];
 
 
-  SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32);
-  SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32);
+  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
 
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
-  SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
-  SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+                                            PtrVT);
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 PtrVT);
 
   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
@@ -1525,7 +1524,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
                                         const SmallVectorImpl<ISD::InputArg>
                                           &Ins,
                                         DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
   if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
     return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
                                      dl, DAG, InVals);
@@ -1542,7 +1542,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
                                       const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
 
   // 32-bit SVR4 ABI Stack Frame Layout:
   //              +-----------------------------------+
@@ -1575,6 +1575,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
   
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Potential tail calls could cause overwriting of argument stack slots.
@@ -1688,24 +1689,27 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     };
     const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
 
-    VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs);
-    VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs);
+    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+                                                          NumGPArgRegs));
+    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+                                                          NumFPArgRegs));
 
     // Make room for NumGPArgRegs and NumFPArgRegs.
     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
                 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
 
-    VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                                CCInfo.getNextStackOffset(),
-                                                true, false);
+    FuncInfo->setVarArgsStackOffset(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             CCInfo.getNextStackOffset(),
+                             true, false));
 
-    VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false);
-    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // The fixed integer arguments of a variadic function are
     // stored to the VarArgsFrameIndex on the stack.
     unsigned GPRIndex = 0;
-    for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
+    for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) {
       SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
       SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
                                    false, false, 0);
@@ -1736,7 +1740,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     // The double arguments are stored to the VarArgsFrameIndex
     // on the stack.
     unsigned FPRIndex = 0;
-    for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
+    for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) {
       SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
       SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
                                    false, false, 0);
@@ -1775,11 +1779,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                       const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
@@ -2090,9 +2095,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   if (isVarArg) {
     int Depth = ArgOffset;
 
-    VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                               Depth, true, false);
-    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+    FuncInfo->setVarArgsFrameIndex(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             Depth, true, false));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // If this function is vararg, store any remaining integer argument regs
     // to their spots on the stack so that they may be loaded by deferencing the
@@ -2359,7 +2365,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                         SDValue &LROpOut,
                                                         SDValue &FPOpOut,
                                                         bool isDarwinABI,
-                                                        DebugLoc dl) {
+                                                        DebugLoc dl) const {
   if (SPDiff) {
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -2582,7 +2588,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) {
+                                   SmallVectorImpl<SDValue> &InVals) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
@@ -2613,7 +2619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
                               SDValue &Callee,
                               int SPDiff, unsigned NumBytes,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                              SmallVectorImpl<SDValue> &InVals) {
+                              SmallVectorImpl<SDValue> &InVals) const {
   std::vector<EVT> NodeTys;
   SmallVector<SDValue, 8> Ops;
   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
@@ -2701,7 +2707,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   if (isTailCall)
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                    Ins, DAG);
@@ -2724,7 +2730,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals) {
+                                  SmallVectorImpl<SDValue> &InVals) const {
   // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
 
@@ -2930,7 +2936,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) {
+                                    SmallVectorImpl<SDValue> &InVals) const {
 
   unsigned NumOps  = Outs.size();
 
@@ -3291,7 +3297,7 @@ SDValue
 PPCTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -3323,7 +3329,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
 }
 
 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
-                                   const PPCSubtarget &Subtarget) {
+                                   const PPCSubtarget &Subtarget) const {
   // When we pop the dynamic allocation we need to restore the SP link.
   DebugLoc dl = Op.getDebugLoc();
 
@@ -3407,7 +3413,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
 
 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                          SelectionDAG &DAG,
-                                         const PPCSubtarget &Subtarget) {
+                                         const PPCSubtarget &Subtarget) const {
   // Get the inputs.
   SDValue Chain = Op.getOperand(0);
   SDValue Size  = Op.getOperand(1);
@@ -3428,7 +3434,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
-SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // Not FP? Not a fsel.
   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
       !Op.getOperand(2).getValueType().isFloatingPoint())
@@ -3502,7 +3508,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 
 // FIXME: Split this code up when LegalizeDAGTypes lands.
 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                                           DebugLoc dl) {
+                                           DebugLoc dl) const {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
   if (Src.getValueType() == MVT::f32)
@@ -3537,7 +3543,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                      false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
@@ -3586,7 +3593,8 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   return FP;
 }
 
-SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   /*
    The rounding mode is in bits 30:31 of FPSR, and has the following
@@ -3649,7 +3657,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
 }
 
-SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   DebugLoc dl = Op.getDebugLoc();
@@ -3678,7 +3686,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(OutOps, 2, dl);
 }
 
-SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned BitWidth = VT.getSizeInBits();
@@ -3707,7 +3715,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(OutOps, 2, dl);
 }
 
-SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
@@ -3808,7 +3816,8 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
 // this case more efficiently than a constant pool load, lower it to the
 // sequence of ops that should be used.
-SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                             SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
@@ -4050,7 +4059,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
 /// return the code it can be lowered into.  Worst case, it can always be
 /// lowered into a vperm.
 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
-                                               SelectionDAG &DAG) {
+                                               SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -4216,7 +4225,7 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
 /// lower, do it, otherwise return null.
 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG) const {
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   // opcode number of the comparison.
   DebugLoc dl = Op.getDebugLoc();
@@ -4284,12 +4293,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 }
 
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
-                                                   SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Create a stack slot that is 16-byte aligned.
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = getPointerTy();
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   // Store the input value into Value#0 of the stack slot.
@@ -4301,7 +4310,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                      false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   if (Op.getValueType() == MVT::v4i32) {
     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
@@ -4362,7 +4371,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
-SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
@@ -4373,12 +4382,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
   case ISD::VASTART:
-    return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
-                        VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+    return LowerVASTART(Op, DAG, PPCSubTarget);
 
   case ISD::VAARG:
-    return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
-                      VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+    return LowerVAARG(Op, DAG, PPCSubTarget);
 
   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
   case ISD::DYNAMIC_STACKALLOC:
@@ -4412,7 +4419,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
@@ -4677,8 +4684,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
 
 MachineBasicBlock *
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                               MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
@@ -4716,12 +4722,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->insert(It, sinkMBB);
     // Update machine-CFG edges by first adding all successors of the current
     // block to the new block which will contain the Phi node for the select.
-    // Also inform sdisel of the edge changes.
     for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-           E = BB->succ_end(); I != E; ++I) {
-      EM->insert(std::make_pair(*I, sinkMBB));
+           E = BB->succ_end(); I != E; ++I)
       sinkMBB->addSuccessor(*I);
-    }
     // Next, remove all successors of the current block, and add the true
     // and fallthrough blocks as its successors.
     while (!BB->succ_empty())
@@ -5032,7 +5035,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
-  TargetMachine &TM = getTargetMachine();
+  const TargetMachine &TM = getTargetMachine();
   SelectionDAG &DAG = DCI.DAG;
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
@@ -5491,46 +5494,59 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
   return false;
 }
 
-SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  // Depths > 0 not supported yet!
-  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
-    return SDValue();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
+  // Make sure the function does not optimize away the store of the RA to
+  // the stack.
   MachineFunction &MF = DAG.getMachineFunction();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setLRStoreRequired();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+    
+      DAG.getConstant(PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI),
+                      isPPC64? MVT::i64 : MVT::i32);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       NULL, 0, false, false, 0);
+  }
 
   // Just load the return address off the stack.
   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
-
-  // Make sure the function really does not optimize away the store of the RA
-  // to the stack.
-  FuncInfo->setLRStoreRequired();
-  return DAG.getLoad(getPointerTy(), dl,
-                     DAG.getEntryNode(), RetAddrFI, NULL, 0,
-                     false, false, 0);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, NULL, 0, false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                          SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  // Depths > 0 not supported yet!
-  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
-    return SDValue();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
-                  && MFI->getStackSize();
-
-  if (isPPC64)
-    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1,
-      MVT::i64);
-  else
-    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1,
-      MVT::i32);
+  MFI->setFrameAddressIsTaken(true);
+  bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+                  MFI->getStackSize() &&
+                  !MF.getFunction()->hasFnAttr(Attribute::Naked);
+  unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
+                                (is31 ? PPC::R31 : PPC::R1);
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+                                         PtrVT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+                            FrameAddr, NULL, 0, false, false, 0);
+  return FrameAddr;
 }
 
 bool
@@ -5547,12 +5563,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 /// probably because the source does not need to be loaded. If
 /// 'NonScalarIntSafe' is true, that means it's safe to return a
 /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. It returns EVT::Other if SelectionDAG should be responsible
-/// for determining it.
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
                                            bool NonScalarIntSafe,
-                                           SelectionDAG &DAG) const {
+                                           bool MemcpyStrSrc,
+                                           MachineFunction &MF) const {
   if (this->PPCSubTarget.isPPC64()) {
     return MVT::i64;
   } else {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index f816bdd..1d05f3d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -233,14 +233,8 @@ namespace llvm {
   }
   
   class PPCTargetLowering : public TargetLowering {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-    int VarArgsStackOffset;           // StackOffset for start of stack
-                                      // arguments.
-    unsigned VarArgsNumGPR;           // Index of the first unused integer
-                                      // register for parameter passing.
-    unsigned VarArgsNumFPR;           // Index of the first unused double
-                                      // register for parameter passing.
     const PPCSubtarget &PPCSubTarget;
+
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
     
@@ -285,13 +279,13 @@ namespace llvm {
     
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
     
@@ -302,9 +296,9 @@ namespace llvm {
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
     MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, 
                                         MachineBasicBlock *MBB, bool is64Bit,
                                         unsigned BinOpcode) const;
@@ -355,12 +349,14 @@ namespace llvm {
     /// probably because the source does not need to be loaded. If
     /// 'NonScalarIntSafe' is true, that means it's safe to return a
     /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-    /// from memory. It returns EVT::Other if SelectionDAG should be responsible
-    /// for determining it.
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size,
-                        unsigned DstAlign, unsigned SrcAlign,
-                        bool NonScalarIntSafe, SelectionDAG &DAG) const;
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
@@ -382,46 +378,43 @@ namespace llvm {
                                          SDValue &LROpOut,
                                          SDValue &FPOpOut,
                                          bool isDarwinABI,
-                                         DebugLoc dl);
-
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
+                                         DebugLoc dl) const;
+
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                           int VarArgsFrameIndex, int VarArgsStackOffset,
-                           unsigned VarArgsNumGPR, unsigned VarArgsNumFPR,
-                           const PPCSubtarget &Subtarget);
-    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex,
-                         int VarArgsStackOffset, unsigned VarArgsNumGPR,
-                         unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget);
+                         const PPCSubtarget &Subtarget) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+                       const PPCSubtarget &Subtarget) const;
     SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
-                                const PPCSubtarget &Subtarget);
+                                const PPCSubtarget &Subtarget) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
-                                      const PPCSubtarget &Subtarget);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
-    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
+                                      const PPCSubtarget &Subtarget) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
     SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
                        bool isVarArg,
                        SelectionDAG &DAG,
@@ -431,14 +424,14 @@ namespace llvm {
                        SDValue &Callee,
                        int SPDiff, unsigned NumBytes,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                       SmallVectorImpl<SDValue> &InVals);
+                       SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -446,26 +439,26 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue
       LowerFormalArguments_Darwin(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals);
+                                  SmallVectorImpl<SDValue> &InVals) const;
     SDValue
       LowerFormalArguments_SVR4(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
                                 DebugLoc dl, SelectionDAG &DAG,
-                                SmallVectorImpl<SDValue> &InVals);
+                                SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue
       LowerCall_Darwin(SDValue Chain, SDValue Callee,
@@ -473,14 +466,14 @@ namespace llvm {
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
                        DebugLoc dl, SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals);
+                       SmallVectorImpl<SDValue> &InVals) const;
     SDValue
       LowerCall_SVR4(SDValue Chain, SDValue Callee,
                      CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      DebugLoc dl, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals);
+                     SmallVectorImpl<SDValue> &InVals) const;
   };
 }
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b0a282..ae1fbd8 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -24,10 +24,13 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/MC/MCAsmInfo.h"
-using namespace llvm;
 
+namespace llvm {
 extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
 extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+}
+
+using namespace llvm;
 
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
@@ -642,6 +645,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     MBB.insert(MI, NewMIs[i]);
 }
 
+MachineInstr*
+PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
+  addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
 /// copy instructions, turning them into load/store instructions.
 MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -778,6 +791,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case PPC::DBG_LABEL:
   case PPC::EH_LABEL:
   case PPC::GC_LABEL:
+  case PPC::DBG_VALUE:
     return 0;
   default:
     return 4; // PowerPC instructions are all 4 bytes
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 57facac..9fb6e7d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -126,6 +126,12 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC) const;
   
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
   /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
   /// copy instructions, turning them into load/store instructions.
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index b359dd3..e2649c8 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -58,6 +58,18 @@ private:
   /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
   bool HasFastCall;
 
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// VarArgsStackOffset - StackOffset for start of stack
+  /// arguments.
+  int VarArgsStackOffset;
+  /// VarArgsNumGPR - Index of the first unused integer
+  /// register for parameter passing.
+  unsigned VarArgsNumGPR;
+  /// VarArgsNumFPR - Index of the first unused double
+  /// register for parameter passing.
+  unsigned VarArgsNumFPR;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
@@ -66,7 +78,11 @@ public:
       LRStoreRequired(false),
       MinReservedArea(0),
       TailCallSPDelta(0),
-      HasFastCall(false) {}
+      HasFastCall(false),
+      VarArgsFrameIndex(0),
+      VarArgsStackOffset(0),
+      VarArgsNumGPR(0),
+      VarArgsNumFPR(0) {}
 
   int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
   void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -96,6 +112,18 @@ public:
 
   void setHasFastCall() { HasFastCall = true; }
   bool hasFastCall() const { return HasFastCall;}
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+  int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+  void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+  unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+  void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+  unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+  void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 52d87cd..5f1e04e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -43,7 +43,6 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
-using namespace llvm;
 
 // FIXME This disables some code that aligns the stack to a boundary
 // bigger than the default (16 bytes on Darwin) when there is a stack local
@@ -56,14 +55,19 @@ using namespace llvm;
 #define ALIGN_STACK 0
 
 // FIXME (64-bit): Eventually enable by default.
+namespace llvm {
 cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
-                            cl::init(false),
-                            cl::desc("Enable PPC32 register scavenger"),
-                            cl::Hidden);
+                                   cl::init(false),
+                                   cl::desc("Enable PPC32 register scavenger"),
+                                   cl::Hidden);
 cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
-                            cl::init(false),
-                            cl::desc("Enable PPC64 register scavenger"),
-                            cl::Hidden);
+                                   cl::init(false),
+                                   cl::desc("Enable PPC64 register scavenger"),
+                                   cl::Hidden);
+}
+
+using namespace llvm;
+
 #define EnableRegisterScavenging \
   ((EnablePPC32RS && !Subtarget.isPPC64()) || \
    (EnablePPC64RS && Subtarget.isPPC64()))
@@ -405,7 +409,10 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
 //
 static bool needsFP(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects() ||
+  // Naked functions have no stack frame pushed, so we don't have a frame pointer.
+  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+    return false;
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
     (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
 
@@ -790,7 +797,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // If we're not using a Frame Pointer that has been set to the value of the
   // SP before having the stack size subtracted from it, then add the stack size
   // to Offset to get the correct offset.
-  Offset += MFI->getStackSize();
+  // Naked functions have stack size 0, although getStackSize may not reflect that
+  // because we didn't call all the pieces that compute it for naked functions.
+  if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+    Offset += MFI->getStackSize();
 
   // If we can, encode the offset directly into the instruction.  If this is a
   // normal PPC "ri" instruction, any 16-bit value can be safely encoded.  If
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index d589414..9664f14 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -15,8 +15,6 @@ def SLU    : FuncUnit; // Store/load unit
 def SRU    : FuncUnit; // special register unit
 def IU1    : FuncUnit; // integer unit 1 (simple)
 def IU2    : FuncUnit; // integer unit 2 (complex)
-def IU3    : FuncUnit; // integer unit 3 (7450 simple)
-def IU4    : FuncUnit; // integer unit 4 (7450 simple)
 def FPU1   : FuncUnit; // floating point unit 1
 def FPU2   : FuncUnit; // floating point unit 2
 def VPU    : FuncUnit; // vector permutation unit
@@ -24,7 +22,6 @@ def VIU1   : FuncUnit; // vector integer unit 1 (simple)
 def VIU2   : FuncUnit; // vector integer unit 2 (complex)
 def VFPU   : FuncUnit; // vector floating point unit
 
-
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for PowerPC
 //
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index f72194d..7344763 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -12,7 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 
-def G3Itineraries : ProcessorItineraries<[
+def G3Itineraries : ProcessorItineraries<
+  [IU1, IU2, FPU1, BPU, SRU, SLU], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index 92ed20f..7efc693 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -11,7 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-def G4Itineraries : ProcessorItineraries<[
+def G4Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 7474ba4..15056c0 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -11,7 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-def G4PlusItineraries : ProcessorItineraries<[
+def IU3    : FuncUnit; // integer unit 3 (7450 simple)
+def IU4    : FuncUnit; // integer unit 4 (7450 simple)
+
+def G4PlusItineraries : ProcessorItineraries<
+  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntDivW     , [InstrStage<23, [IU2]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index d282147..2dffc48 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -11,7 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-def G5Itineraries : ProcessorItineraries<[
+def G5Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [
   InstrItinData<IntGeneral  , [InstrStage<2, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<3, [IU1, IU2]>]>,
   InstrItinData<IntDivD     , [InstrStage<68, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 0000000..c0004a9
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+#include "PPCSelectionDAGInfo.h"
+using namespace llvm;
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo() {
+}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
+}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 0000000..3ad3418
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  PPCSelectionDAGInfo();
+  ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index ac9ae2b..35e33a2 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -44,8 +44,8 @@ public:
   virtual const PPCInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const PPCFrameInfo     *getFrameInfo() const { return &FrameInfo; }
   virtual       PPCJITInfo       *getJITInfo()         { return &JITInfo; }
-  virtual       PPCTargetLowering *getTargetLowering() const { 
-   return const_cast<PPCTargetLowering*>(&TLInfo); 
+  virtual const PPCTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
   }
   virtual const PPCRegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 052a575..144bf5d 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -263,19 +263,6 @@ if anyone cared enough about sincos.
 
 //===---------------------------------------------------------------------===//
 
-Turn this into a single byte store with no load (the other 3 bytes are
-unmodified):
-
-define void @test(i32* %P) {
-	%tmp = load i32* %P
-        %tmp14 = or i32 %tmp, 3305111552
-        %tmp15 = and i32 %tmp14, 3321888767
-        store i32 %tmp15, i32* %P
-        ret void
-}
-
-//===---------------------------------------------------------------------===//
-
 quantum_sigma_x in 462.libquantum contains the following loop:
 
       for(i=0; i<reg->size; i++)
@@ -1843,3 +1830,21 @@ entry:
 
 //===---------------------------------------------------------------------===//
 
+We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates.
+See GCC PR34949
+
+//===---------------------------------------------------------------------===//
+
+In this code:
+
+long foo(long x) {
+  return x > 1 ? x : 1;
+}
+
+LLVM emits a comparison with 1 instead of 0. 0 would be equivalent
+and cheaper on most targets.
+
+LLVM prefers comparisons with zero over non-zero in general, but in this
+case it choses instead to keep the max operation obvious.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
index e3ca18e..da629f6 100644
--- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
@@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMSparcAsmPrinter
   SparcAsmPrinter.cpp
   )
-add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
-\ No newline at end of file
+add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 74f320a..684cadf 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(SparcCodeGen
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
   SparcTargetMachine.cpp
+  SparcSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index a7d1805..698923e 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SparcISelLowering.h"
 #include "SparcTargetMachine.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
@@ -68,7 +67,6 @@ private:
 }  // end anonymous namespace
 
 SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
-  MachineFunction *MF = BB->getParent();
   unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 4e93ef0..f47e53a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -14,6 +14,7 @@
 
 #include "SparcISelLowering.h"
 #include "SparcTargetMachine.h"
+#include "SparcMachineFunctionInfo.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -37,7 +38,7 @@ SDValue
 SparcTargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -85,10 +86,12 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                                           const SmallVectorImpl<ISD::InputArg>
                                             &Ins,
                                           DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -226,7 +229,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
   // Store remaining ArgRegs to the stack if this is a varargs function.
   if (isVarArg) {
     // Remember the vararg offset for the va_start implementation.
-    VarArgsFrameOffset = ArgOffset;
+    FuncInfo->setVarArgsFrameOffset(ArgOffset);
 
     std::vector<SDValue> OutChains;
 
@@ -261,7 +264,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
   // Sparc target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -752,8 +755,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
 }
 
 SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, 
-                                                SelectionDAG &DAG) {
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+                                                SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   // FIXME there isn't really any debug info here
   DebugLoc dl = Op.getDebugLoc();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
@@ -773,11 +776,11 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
-                                               SelectionDAG &DAG) {
+                                               SelectionDAG &DAG) const {
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   // FIXME there isn't really any debug info here
   DebugLoc dl = Op.getDebugLoc();
-  Constant *C = N->getConstVal();
+  const Constant *C = N->getConstVal();
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
@@ -873,14 +876,18 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
-                              SparcTargetLowering &TLI) {
+                            const SparcTargetLowering &TLI) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   DebugLoc dl = Op.getDebugLoc();
-  SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                 DAG.getRegister(SP::I6, MVT::i32),
-                                 DAG.getConstant(TLI.getVarArgsFrameOffset(),
-                                                 MVT::i32));
+  SDValue Offset =
+    DAG.getNode(ISD::ADD, dl, MVT::i32,
+                DAG.getRegister(SP::I6, MVT::i32),
+                DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
+                                MVT::i32));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
                       false, false, 0);
@@ -939,7 +946,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 
 
 SDValue SparcTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) {
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
   // Frame & Return address.  Currently unimplemented
@@ -961,8 +968,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 MachineBasicBlock *
 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   unsigned BROpcode;
   unsigned CC;
@@ -1006,12 +1012,9 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   F->insert(It, sinkMBB);
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 2ee73c1..5ebdcac 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -41,12 +41,9 @@ namespace llvm {
   }
 
   class SparcTargetLowering : public TargetLowering {
-    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
   public:
     SparcTargetLowering(TargetMachine &TM);
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
-
-    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// computeMaskedBitsForTargetNode - Determine which of the bits specified
     /// in Mask are known to be either zero or one and return them in the
@@ -58,9 +55,9 @@ namespace llvm {
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
@@ -82,7 +79,7 @@ namespace llvm {
                            bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -91,16 +88,16 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 56d8708..e34c131 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -20,12 +20,20 @@ namespace llvm {
   class SparcMachineFunctionInfo : public MachineFunctionInfo {
   private:
     unsigned GlobalBaseReg;
+
+    /// VarArgsFrameOffset - Frame offset to start of varargs area.
+    int VarArgsFrameOffset;
+
   public:
-    SparcMachineFunctionInfo() : GlobalBaseReg(0) {}
-    explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
+    SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+    explicit SparcMachineFunctionInfo(MachineFunction &MF)
+      : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
 
     unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
     void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+    void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
   };
 }
 
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
new file mode 100644
index 0000000..4825aa9
--- /dev/null
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SparcSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparc-selectiondag-info"
+#include "SparcSelectionDAGInfo.h"
+using namespace llvm;
+
+SparcSelectionDAGInfo::SparcSelectionDAGInfo() {
+}
+
+SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
+}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
new file mode 100644
index 0000000..bc1b561
--- /dev/null
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCSELECTIONDAGINFO_H
+#define SPARCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  SparcSelectionDAGInfo();
+  ~SparcSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 5834d08..1367a31 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -39,8 +39,8 @@ public:
   virtual const SparcRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual SparcTargetLowering* getTargetLowering() const {
-    return const_cast<SparcTargetLowering*>(&TLInfo);
+  virtual const SparcTargetLowering* getTargetLowering() const {
+    return &TLInfo;
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 81e51d8..880e56f 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_target(SystemZCodeGen
   SystemZRegisterInfo.cpp
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
+  SystemZSelectionDAGInfo.cpp
   )
 
 target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 8152e1d..75d563b 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZ.h"
-#include "SystemZISelLowering.h"
 #include "SystemZTargetMachine.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -85,7 +84,7 @@ namespace {
 ///
 namespace {
   class SystemZDAGToDAGISel : public SelectionDAGISel {
-    SystemZTargetLowering &Lowering;
+    const SystemZTargetLowering &Lowering;
     const SystemZSubtarget &Subtarget;
 
     void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
@@ -588,7 +587,7 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
 bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
-      IsLegalToFold(N, P, P))
+      IsLegalToFold(N, P, P, OptLevel))
     return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
   return false;
 }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6f4b30f..e98f18b 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -158,7 +158,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 }
 
-SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+                                              SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
   case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
@@ -236,7 +237,8 @@ SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
                                               &Ins,
                                             DebugLoc dl,
                                             SelectionDAG &DAG,
-                                            SmallVectorImpl<SDValue> &InVals) {
+                                            SmallVectorImpl<SDValue> &InVals)
+                                              const {
 
   switch (CallConv) {
   default:
@@ -254,7 +256,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
                                  const SmallVectorImpl<ISD::InputArg> &Ins,
                                  DebugLoc dl, SelectionDAG &DAG,
-                                 SmallVectorImpl<SDValue> &InVals) {
+                                 SmallVectorImpl<SDValue> &InVals) const {
   // SystemZ target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -280,7 +282,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
                                            &Ins,
                                          DebugLoc dl,
                                          SelectionDAG &DAG,
-                                         SmallVectorImpl<SDValue> &InVals) {
+                                         SmallVectorImpl<SDValue> &InVals)
+                                           const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -293,7 +296,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
 
   if (isVarArg)
-    llvm_report_error("Varargs not supported yet");
+    report_fatal_error("Varargs not supported yet");
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     SDValue ArgValue;
@@ -371,7 +374,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                         &Outs,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) {
+                                      SmallVectorImpl<SDValue> &InVals) const {
 
   MachineFunction &MF = DAG.getMachineFunction();
 
@@ -505,7 +508,7 @@ SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
                                        DebugLoc dl, SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) {
+                                       SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -547,7 +550,7 @@ SDValue
 SystemZTargetLowering::LowerReturn(SDValue Chain,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                   DebugLoc dl, SelectionDAG &DAG) {
+                                   DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
@@ -600,7 +603,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
 
 SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
                                        ISD::CondCode CC, SDValue &SystemZCC,
-                                       SelectionDAG &DAG) {
+                                       SelectionDAG &DAG) const {
   // FIXME: Emit a test if RHS is zero
 
   bool isUnsigned = false;
@@ -678,7 +681,7 @@ SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
 }
 
 
-SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS   = Op.getOperand(2);
@@ -692,7 +695,8 @@ SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
                      Chain, Dest, SystemZCC, Flag);
 }
 
-SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
   SDValue LHS    = Op.getOperand(0);
   SDValue RHS    = Op.getOperand(1);
   SDValue TrueV  = Op.getOperand(2);
@@ -714,9 +718,9 @@ SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
-                                                  SelectionDAG &DAG) {
+                                                  SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
 
   bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
@@ -753,7 +757,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
 
 // FIXME: PIC here
 SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
-                                              SelectionDAG &DAG) {
+                                              SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
@@ -765,7 +769,7 @@ SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
 // FIXME: PIC here
 // FIXME: This is just dirty hack. We need to lower cpool properly
 SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 
@@ -795,8 +799,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 MachineBasicBlock*
 SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                   MachineBasicBlock *BB) const {
   const SystemZInstrInfo &TII = *TM.getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == SystemZ::Select32  ||
@@ -827,10 +830,6 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
   F->insert(I, copy0MBB);
   F->insert(I, copy1MBB);
-  // Inform sdisel of the edge changes.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 
-         SE = BB->succ_end(); SI != SE; ++SI)
-    EM->insert(std::make_pair(*SI, copy1MBB));
   // Update machine-CFG edges by transferring all successors of the current
   // block to the new block which will contain the Phi node for the select.
   copy1MBB->transferSuccessors(BB);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 36ff994..94bd906 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -58,7 +58,7 @@ namespace llvm {
     explicit SystemZTargetLowering(SystemZTargetMachine &TM);
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific
     /// DAG node.
@@ -74,20 +74,19 @@ namespace llvm {
     TargetLowering::ConstraintType
     getConstraintType(const std::string &Constraint) const;
 
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue EmitCmp(SDValue LHS, SDValue RHS,
                     ISD::CondCode CC, SDValue &SystemZCC,
-                    SelectionDAG &DAG);
+                    SelectionDAG &DAG) const;
 
 
     MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                                   MachineBasicBlock *BB) const;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
     /// specified FP immediate natively. If false, the legalizer will
@@ -101,7 +100,7 @@ namespace llvm {
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCCCArguments(SDValue Chain,
                               CallingConv::ID CallConv,
@@ -109,33 +108,33 @@ namespace llvm {
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl,
                               SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals);
+                              SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     const SystemZSubtarget &Subtarget;
     const SystemZTargetMachine &TM;
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index b69d2f6..fa87061 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -44,7 +44,7 @@ struct SystemZAddressMode {
 
   unsigned IndexReg;
   int32_t Disp;
-  GlobalValue *GV;
+  const GlobalValue *GV;
 
   SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
     Base.Reg = 0;
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
index 1a09206..f9ccc47 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 using namespace llvm;
 
@@ -21,7 +22,8 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) {
   PCSymbol = ".";
 }
 
-MCSection *SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const{
-  return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
-                              0, SectionKind::getMetadata(), false, Ctx);
+const MCSection *SystemZMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const{
+  return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata(), false);
 }
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h
index 00cb99b..87908f2 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h
@@ -22,7 +22,7 @@ namespace llvm {
 
   struct SystemZMCAsmInfo : public MCAsmInfo {
     explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT);
-    virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
   };
   
 } // namespace llvm
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 302c418..638fd17 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -77,7 +77,7 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const
 /// allocas or if frame pointer elimination is disabled.
 bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return NoFramePointerElim || MFI->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
 }
 
 void SystemZRegisterInfo::
@@ -200,7 +200,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
     MachineInstr *MI =
       BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
-      .addReg(SystemZ::R15D).addImm((isSub ? -(int64_t)ThisVal : ThisVal));
+      .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
     // The PSW implicit def is dead.
     MI->getOperand(3).setIsDead();
     Offset -= ThisVal;
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 0000000..87c831b
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZSelectionDAGInfo.h"
+using namespace llvm;
+
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 0000000..5292de9
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  SystemZSelectionDAGInfo();
+  ~SystemZSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 551aeb5..d3357cc 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -49,8 +49,8 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 
-  virtual SystemZTargetLowering *getTargetLowering() const {
-    return const_cast<SystemZTargetLowering*>(&TLInfo);
+  virtual const SystemZTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 643b397..5870d8a 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -228,7 +228,7 @@ void TargetData::init(StringRef Desc) {
 /// @note This has to exist, because this is a pass, but it should never be
 /// used.
 TargetData::TargetData() : ImmutablePass(&ID) {
-  llvm_report_error("Bad TargetData ctor used.  "
+  report_fatal_error("Bad TargetData ctor used.  "
                     "Tool did not specify a TargetData to use?");
 }
 
@@ -269,18 +269,8 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
       return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
     
     // The best match so far depends on what we're looking for.
-    if (AlignType == VECTOR_ALIGN && Alignments[i].AlignType == VECTOR_ALIGN) {
-      // If this is a specification for a smaller vector type, we will fall back
-      // to it.  This happens because <128 x double> can be implemented in terms
-      // of 64 <2 x double>.
-      if (Alignments[i].TypeBitWidth < BitWidth) {
-        // Verify that we pick the biggest of the fallbacks.
-        if (BestMatchIdx == -1 ||
-            Alignments[BestMatchIdx].TypeBitWidth < Alignments[i].TypeBitWidth)
-          BestMatchIdx = i;
-      }
-    } else if (AlignType == INTEGER_ALIGN && 
-               Alignments[i].AlignType == INTEGER_ALIGN) {
+     if (AlignType == INTEGER_ALIGN && 
+         Alignments[i].AlignType == INTEGER_ALIGN) {
       // The "best match" for integers is the smallest size that is larger than
       // the BitWidth requested.
       if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || 
@@ -303,10 +293,15 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
     } else {
       assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
 
-      // If we didn't find a vector size that is smaller or equal to this type,
-      // then we will end up scalarizing this to its element type.  Just return
-      // the alignment of the element.
-      return getAlignment(cast<VectorType>(Ty)->getElementType(), ABIInfo);
+      // By default, use natural alignment for vector types. This is consistent
+      // with what clang and llvm-gcc do.
+      unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+      Align *= cast<VectorType>(Ty)->getNumElements();
+      // If the alignment is not a power of 2, round up to the next power of 2.
+      // This happens for non-power-of-2 length vectors.
+      if (Align & (Align-1))
+        Align = llvm::NextPowerOf2(Align);
+      return Align;
     }
   }
 
@@ -630,8 +625,8 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // Get the array index and the size of each array element.
-      int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue();
-      Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
+      if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+        Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
     }
   }
 
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 44722b3..b9372d0 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -310,7 +310,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
 
   switch (Encoding & 0xF0) {
   default:
-    llvm_report_error("We do not support this DWARF encoding yet!");
+    report_fatal_error("We do not support this DWARF encoding yet!");
   case dwarf::DW_EH_PE_absptr:
     // Do nothing special
     return Res;
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 88871e3..ac67c91 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -25,6 +27,7 @@ namespace llvm {
   bool LessPreciseFPMADOption;
   bool PrintMachineCode;
   bool NoFramePointerElim;
+  bool NoFramePointerElimNonLeaf;
   bool NoExcessFPPrecision;
   bool UnsafeFPMath;
   bool FiniteOnlyFPMathOption;
@@ -33,8 +36,7 @@ namespace llvm {
   FloatABI::ABIType FloatABIType;
   bool NoImplicitFloat;
   bool NoZerosInBSS;
-  bool DwarfExceptionHandling;
-  bool SjLjExceptionHandling;
+  bool JITExceptionHandling;
   bool JITEmitDebugInfo;
   bool JITEmitDebugInfoToDisk;
   bool UnwindTablesMandatory;
@@ -58,6 +60,11 @@ DisableFPElim("disable-fp-elim",
   cl::location(NoFramePointerElim),
   cl::init(false));
 static cl::opt<bool, true>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::location(NoFramePointerElimNonLeaf),
+  cl::init(false));
+static cl::opt<bool, true>
 DisableExcessPrecision("disable-excess-fp-precision",
   cl::desc("Disable optimizations that may increase FP precision"),
   cl::location(NoExcessFPPrecision),
@@ -107,14 +114,9 @@ DontPlaceZerosInBSS("nozero-initialized-in-bss",
   cl::location(NoZerosInBSS),
   cl::init(false));
 static cl::opt<bool, true>
-EnableDwarfExceptionHandling("enable-eh",
-  cl::desc("Emit DWARF exception handling (default if target supports)"),
-  cl::location(DwarfExceptionHandling),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableSjLjExceptionHandling("enable-sjlj-eh",
-  cl::desc("Emit SJLJ exception handling (default if target supports)"),
-  cl::location(SjLjExceptionHandling),
+EnableJITExceptionHandling("jit-enable-eh",
+  cl::desc("Emit exception handling information"),
+  cl::location(JITExceptionHandling),
   cl::init(false));
 // In debug builds, make this default to true.
 #ifdef NDEBUG
@@ -197,7 +199,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
   cl::desc("Use strong PHI elimination."),
   cl::location(StrongPHIElim),
   cl::init(false));
-
+static cl::opt<bool>
+DataSections("fdata-sections",
+  cl::desc("Emit data into separate sections"),
+  cl::init(false));
+static cl::opt<bool>
+FunctionSections("ffunction-sections",
+  cl::desc("Emit functions into separate sections"),
+  cl::init(false));
 //---------------------------------------------------------------------------
 // TargetMachine Class
 //
@@ -244,7 +253,35 @@ void TargetMachine::setAsmVerbosityDefault(bool V) {
   AsmVerbosityDefault = V;
 }
 
+bool TargetMachine::getFunctionSections() {
+  return FunctionSections;
+}
+
+bool TargetMachine::getDataSections() {
+  return DataSections;
+}
+
+void TargetMachine::setFunctionSections(bool V) {
+  FunctionSections = V;
+}
+
+void TargetMachine::setDataSections(bool V) {
+  DataSections = V;
+}
+
 namespace llvm {
+  /// DisableFramePointerElim - This returns true if frame pointer elimination
+  /// optimization should be disabled for the given machine function.
+  bool DisableFramePointerElim(const MachineFunction &MF) {
+    if (NoFramePointerElim)
+      return true;
+    if (NoFramePointerElimNonLeaf) {
+      const MachineFrameInfo *MFI = MF.getFrameInfo();
+      return MFI->hasCalls();
+    }
+    return false;
+  }
+
   /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
   /// is specified on the command line.  When this flag is off(default), the
   /// code generator is not allowed to generate mad (multiply add) if the
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 47873d1..da01350 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -44,7 +44,7 @@ private:
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
   X86Operand *ParseOperand();
-  X86Operand *ParseMemOperand();
+  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
@@ -368,14 +368,22 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 X86Operand *X86ATTAsmParser::ParseOperand() {
   switch (getLexer().getKind()) {
   default:
-    return ParseMemOperand();
+    // Parse a memory operand with no segment register.
+    return ParseMemOperand(0, Parser.getTok().getLoc());
   case AsmToken::Percent: {
-    // FIXME: if a segment register, this could either be just the seg reg, or
-    // the start of a memory operand.
+    // Read the register.
     unsigned RegNo;
     SMLoc Start, End;
     if (ParseRegister(RegNo, Start, End)) return 0;
-    return X86Operand::CreateReg(RegNo, Start, End);
+    
+    // If this is a segment register followed by a ':', then this is the start
+    // of a memory reference, otherwise this is a normal register reference.
+    if (getLexer().isNot(AsmToken::Colon))
+      return X86Operand::CreateReg(RegNo, Start, End);
+    
+    
+    getParser().Lex(); // Eat the colon.
+    return ParseMemOperand(RegNo, Start);
   }
   case AsmToken::Dollar: {
     // $42 -> immediate.
@@ -389,13 +397,10 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
   }
 }
 
-/// ParseMemOperand: segment: disp(basereg, indexreg, scale)
-X86Operand *X86ATTAsmParser::ParseMemOperand() {
-  SMLoc MemStart = Parser.getTok().getLoc();
-  
-  // FIXME: If SegReg ':'  (e.g. %gs:), eat and remember.
-  unsigned SegReg = 0;
-  
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+ 
   // We have to disambiguate a parenthesized expression "(4+5)" from the start
   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
   // only way to do this without lookahead is to eat the '(' and see what is
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index f592396..8b0ed1c 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -474,9 +474,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     // All darwin targets use mach-o.
-    TargetLoweringObjectFileMachO &TLOFMacho = 
-      static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering());
-    
     MachineModuleInfoMachO &MMIMacho =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
     
@@ -486,11 +483,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     Stubs = MMIMacho.GetFnStubList();
     if (!Stubs.empty()) {
       const MCSection *TheSection = 
-        TLOFMacho.getMachOSection("__IMPORT", "__jump_table",
-                                  MCSectionMachO::S_SYMBOL_STUBS |
-                                  MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
-                                  MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                  5, SectionKind::getMetadata());
+        OutContext.getMachOSection("__IMPORT", "__jump_table",
+                                   MCSectionMachO::S_SYMBOL_STUBS |
+                                   MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   5, SectionKind::getMetadata());
       OutStreamer.SwitchSection(TheSection);
 
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
@@ -512,9 +509,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     Stubs = MMIMacho.GetGVStubList();
     if (!Stubs.empty()) {
       const MCSection *TheSection = 
-        TLOFMacho.getMachOSection("__IMPORT", "__pointers",
-                                  MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
-                                  SectionKind::getMetadata());
+        OutContext.getMachOSection("__IMPORT", "__pointers",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
       OutStreamer.SwitchSection(TheSection);
 
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
@@ -587,8 +584,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       // Necessary for dllexport support
       std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
 
-      TargetLoweringObjectFileCOFF &TLOFCOFF =
-        static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+      const TargetLoweringObjectFileCOFF &TLOFCOFF =
+        static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
 
       for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
         if (I->hasDLLExportLinkage())
@@ -617,8 +614,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
 
   if (Subtarget->isTargetELF()) {
-    TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering());
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
 
     MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
 
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index ee59289..95984b2 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -80,6 +80,8 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
   bool runOnMachineFunction(MachineFunction &F);
   
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index a290eb0..effc8ed 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -169,6 +169,15 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     Expr = MCBinaryExpr::CreateSub(Expr, 
                                MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
                                    Ctx);
+    if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) {
+      // If .set directive is supported, use it to reduce the number of
+      // relocations the assembler will generate for differences between
+      // local labels. This is only safe when the symbols are in the same
+      // section so we are restricting it to jumptable references.
+      MCSymbol *Label = Ctx.CreateTempSymbol();
+      AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+      Expr = MCSymbolRefExpr::Create(Label, Ctx);
+    }
     break;
   }
   
@@ -328,61 +337,36 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
 
 void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
                                            raw_ostream &O) {
-  // FIXME: if this is implemented for another target before it goes
-  // away completely, the common part should be moved into AsmPrinter.
-  O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // Only the target-dependent form of DBG_VALUE should get here.
+  // Referencing the offset and metadata as NOps-2 and NOps-1 is
+  // probably portable to other targets; frame pointer location is not.
   unsigned NOps = MI->getNumOperands();
+  assert(NOps==7);
+  O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
   // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V((MDNode*)(MI->getOperand(NOps-1).getMetadata()));
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  if (V.getContext().isSubprogram())
+    O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
   O << V.getName();
   O << " <- ";
-  if (NOps==3) {
-    // Register or immediate value. Register 0 means undef.
-    assert(MI->getOperand(0).isReg() ||
-           MI->getOperand(0).isImm() ||
-           MI->getOperand(0).isFPImm());
-    if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) {
-      // Suppress offset in this case, it is not meaningful.
-      O << "undef";
-      OutStreamer.AddBlankLine();
-      return;
-    }
-    
-    if (MI->getOperand(0).isFPImm()) {
-      // This is more naturally done in printOperand, but since the only use
-      // of such an operand is in this comment and that is temporary (and it's
-      // ugly), we prefer to keep this localized.
-      // The include of Type.h may be removable when this code is.
-      if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() ||
-          MI->getOperand(0).getFPImm()->getType()->isDoubleTy())
-        MI->getOperand(0).print(O, &TM);
-      else {
-        // There is no good way to print long double.  Convert a copy to
-        // double.  Ah well, it's only a comment.
-        bool ignored;
-        APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
-        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                    &ignored);
-        O << "(long double) " << APF.convertToDouble();
-      }
-    } else
-      printOperand(MI, 0, O);
-  } else {
-    if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) {
-      // Suppress offset in this case, it is not meaningful.
-      O << "undef";
-      OutStreamer.AddBlankLine();
-      return;
-    }
-    // Frame address.  Currently handles register +- offset only.
-    assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
-    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O);
-    O << ']';
-  }
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
+  O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O);
+  O << ']';
   O << "+";
   printOperand(MI, NOps-2, O);
 }
 
+MachineLocation 
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  MachineLocation Location;
+  assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
+  Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+  return Location;
+}
+
 
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(OutContext, Mang, *this);
@@ -395,7 +379,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       OutStreamer.EmitRawText(StringRef(OS.str()));
     }
     return;
-      
+
   case X86::MOVPC32r: {
     MCInst TmpInst;
     // This is a pseudo op for a two instruction sequence with a label, which
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 22285f1..da6bb91 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -13,6 +13,7 @@ tablegen(X86GenDAGISel.inc -gen-dag-isel)
 tablegen(X86GenFastISel.inc -gen-fast-isel)
 tablegen(X86GenCallingConv.inc -gen-callingconv)
 tablegen(X86GenSubtarget.inc -gen-subtarget)
+tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
 
 set(sources
   SSEDomainFix.cpp
@@ -33,6 +34,7 @@ set(sources
   X86TargetMachine.cpp
   X86TargetObjectFile.cpp
   X86FastISel.cpp
+  X86SelectionDAGInfo.cpp
   )
 
 if( CMAKE_CL_64 )
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 2a83a9c..9f91060 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -4,4 +4,11 @@ add_llvm_library(LLVMX86Disassembler
   X86Disassembler.cpp
   X86DisassemblerDecoder.c
   )
+# workaround for hanging compilation on MSVC9
+if( MSVC_VERSION EQUAL 1500 )
+set_property(
+  SOURCE X86Disassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
 add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 7328dc0..62e7357 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -17,6 +17,7 @@
 #include "X86Disassembler.h"
 #include "X86DisassemblerDecoder.h"
 
+#include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,7 @@
 #include "llvm/Support/raw_ostream.h"
 
 #include "X86GenRegisterNames.inc"
+#include "X86GenEDInfo.inc"
 
 using namespace llvm;
 using namespace llvm::X86Disassembler;
@@ -69,6 +71,10 @@ X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
 X86GenericDisassembler::~X86GenericDisassembler() {
 }
 
+EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+  return instInfoX86;
+}
+
 /// regionReader - a callback function that wraps the readByte method from
 ///   MemoryObject.
 ///
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 0e6e0b0..9c54262 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -94,6 +94,8 @@ namespace llvm {
 class MCInst;
 class MemoryObject;
 class raw_ostream;
+
+struct EDInstInfo;
   
 namespace X86Disassembler {
 
@@ -115,6 +117,9 @@ public:
                       const MemoryObject &region,
                       uint64_t address,
                       raw_ostream &vStream) const;
+
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
 private:
   DisassemblerMode              fMode;
 };
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index db694bc..64f6b2d 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1277,6 +1277,9 @@ static int readOperands(struct InternalInstruction* insn) {
     case ENCODING_IB:
       if (readImmediate(insn, 1))
         return -1;
+      if (insn->spec->operands[index].type == TYPE_IMM3 &&
+          insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+        return -1;
       break;
     case ENCODING_IW:
       if (readImmediate(insn, 2))
@@ -1293,6 +1296,7 @@ static int readOperands(struct InternalInstruction* insn) {
     case ENCODING_Iv:
       if (readImmediate(insn, insn->immediateSize))
         return -1;
+      break;
     case ENCODING_Ia:
       if (readImmediate(insn, insn->addressSize))
         return -1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index c213f89..4a7cd57 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -236,6 +236,7 @@ struct ContextDecision {
   ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
   ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index 4b54676..5e80845 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -159,7 +159,7 @@ int SSEDomainFixPass::RegIndex(unsigned reg) {
   // We just need them to be consecutive, ordering doesn't matter.
   assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
   reg -= X86::XMM0;
-  return reg < NumRegs ? reg : -1;
+  return reg < NumRegs ? (int) reg : -1;
 }
 
 DomainValue *SSEDomainFixPass::Alloc(int domain) {
@@ -216,8 +216,15 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) {
   if (LiveRegs && (dv = LiveRegs[rx])) {
     if (dv->isCollapsed())
       dv->addDomain(domain);
-    else
+    else if (dv->hasDomain(domain))
       Collapse(dv, domain);
+    else {
+      // This is an incompatible open DomainValue. Collapse it to whatever and force
+      // the new value into domain. This costs a domain crossing.
+      Collapse(dv, dv->getFirstDomain());
+      assert(LiveRegs[rx] && "Not live after collapse?");
+      LiveRegs[rx]->addDomain(domain);
+    }
   } else {
     // Set up basic collapsed DomainValue.
     SetLiveReg(rx, Alloc(domain));
@@ -263,7 +270,7 @@ bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
 
 void SSEDomainFixPass::enterBasicBlock() {
   // Try to coalesce live-out registers from predecessors.
-  for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(),
+  for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
          e = MBB->livein_end(); i != e; ++i) {
     int rx = RegIndex(*i);
     if (rx < 0) continue;
@@ -281,8 +288,9 @@ void SSEDomainFixPass::enterBasicBlock() {
       // We have a live DomainValue from more than one predecessor.
       if (LiveRegs[rx]->isCollapsed()) {
         // We are already collapsed, but predecessor is not. Force him.
-        if (!pdv->isCollapsed())
-          Collapse(pdv, LiveRegs[rx]->getFirstDomain());
+        unsigned domain = LiveRegs[rx]->getFirstDomain();
+        if (!pdv->isCollapsed() && pdv->hasDomain(domain))
+          Collapse(pdv, domain);
         continue;
       }
 
@@ -290,7 +298,7 @@ void SSEDomainFixPass::enterBasicBlock() {
       if (!pdv->isCollapsed())
         Merge(LiveRegs[rx], pdv);
       else
-        Collapse(LiveRegs[rx], pdv->getFirstDomain());
+        Force(rx, pdv->getFirstDomain());
     }
   }
 }
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 9be38a4..22e89a5 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -69,6 +69,12 @@ TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
 ///
 FunctionPass *createEmitX86CodeToMemory();
 
+/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
+/// which determines whether the frame pointer register should be
+/// reserved in case dynamic stack alignment is later required.
+///
+FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
 extern Target TheX86_32Target, TheX86_64Target;
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 8e2928c3..ba9c1d0 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -111,7 +111,7 @@ void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
     SmallString<256> Tmp;
     raw_svector_ostream OS(Tmp);
     IF->getInst().dump_pretty(OS);
-    llvm_report_error("unexpected instruction to relax: " + OS.str());
+    report_fatal_error("unexpected instruction to relax: " + OS.str());
   }
 
   Res = IF->getInst();
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 6638e11..8f02604 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -79,7 +79,7 @@ namespace {
 
   private:
     void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
-    void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                            intptr_t Disp = 0, intptr_t PCAdj = 0,
                            bool Indirect = false);
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
@@ -163,7 +163,8 @@ void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
 /// this is part of a "take the address of a global" instruction.
 ///
 template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
+                                unsigned Reloc,
                                 intptr_t Disp /* = 0 */,
                                 intptr_t PCAdj /* = 0 */,
                                 bool Indirect /* = false */) {
@@ -174,9 +175,10 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
     RelocCST = PCAdj;
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
-                                           GV, RelocCST, false)
+                                           const_cast<GlobalValue *>(GV),
+                                           RelocCST, false)
     : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                               GV, RelocCST, false);
+                               const_cast<GlobalValue *>(GV), RelocCST, false);
   MCE.addRelocation(MR);
   // The relocated value will be added to the displacement
   if (Reloc == X86::reloc_absolute_dword)
@@ -378,6 +380,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
   const MachineOperand &IndexReg = MI.getOperand(Op+2);
 
   unsigned BaseReg = Base.getReg();
+  
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP ||
+      (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
+    assert(IndexReg.getReg() == 0 && Is64BitMode &&
+           "Invalid rip-relative address");
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+    return;
+  }
 
   // Indicate that the displacement will use an pcrel or absolute reference
   // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
@@ -445,7 +457,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
     // Emit the normal disp32 encoding.
     MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
     ForceDisp32 = true;
-  } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
+  } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
     // Emit no displacement ModR/M byte
     MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
   } else if (isDisp8(DispVal)) {
@@ -600,7 +612,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       // We allow inline assembler nodes with empty bodies - they can
       // implicitly define registers, which is ok for JIT.
       if (MI.getOperand(0).getSymbolName()[0])
-        llvm_report_error("JIT does not support inline asm!");
+        report_fatal_error("JIT does not support inline asm!");
       break;
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::GC_LABEL:
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 1597d2b3..f84995d 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
 
 X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
   : TargetELFWriterInfo(TM) {
-    bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
     EMachine = is64Bit ? EM_X86_64 : EM_386;
   }
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 7849b51..ff9208c 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -15,7 +15,6 @@
 
 #include "X86.h"
 #include "X86InstrBuilder.h"
-#include "X86ISelLowering.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
@@ -56,12 +55,13 @@ public:
   explicit X86FastISel(MachineFunction &mf,
                        DenseMap<const Value *, unsigned> &vm,
                        DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-                       DenseMap<const AllocaInst *, int> &am
+                       DenseMap<const AllocaInst *, int> &am,
+                       std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                       , SmallSet<Instruction*, 8> &cil
+                       , SmallSet<const Instruction *, 8> &cil
 #endif
                        )
-    : FastISel(mf, vm, bm, am
+    : FastISel(mf, vm, bm, am, pn
 #ifndef NDEBUG
                , cil
 #endif
@@ -72,16 +72,16 @@ public:
     X86ScalarSSEf32 = Subtarget->hasSSE1();
   }
 
-  virtual bool TargetSelectInstruction(Instruction *I);
+  virtual bool TargetSelectInstruction(const Instruction *I);
 
 #include "X86GenFastISel.inc"
 
 private:
-  bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT);
+  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
   
   bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
 
-  bool X86FastEmitStore(EVT VT, Value *Val,
+  bool X86FastEmitStore(EVT VT, const Value *Val,
                         const X86AddressMode &AM);
   bool X86FastEmitStore(EVT VT, unsigned Val,
                         const X86AddressMode &AM);
@@ -89,32 +89,32 @@ private:
   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
                          unsigned &ResultReg);
   
-  bool X86SelectAddress(Value *V, X86AddressMode &AM);
-  bool X86SelectCallAddress(Value *V, X86AddressMode &AM);
+  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
 
-  bool X86SelectLoad(Instruction *I);
+  bool X86SelectLoad(const Instruction *I);
   
-  bool X86SelectStore(Instruction *I);
+  bool X86SelectStore(const Instruction *I);
 
-  bool X86SelectCmp(Instruction *I);
+  bool X86SelectCmp(const Instruction *I);
 
-  bool X86SelectZExt(Instruction *I);
+  bool X86SelectZExt(const Instruction *I);
 
-  bool X86SelectBranch(Instruction *I);
+  bool X86SelectBranch(const Instruction *I);
 
-  bool X86SelectShift(Instruction *I);
+  bool X86SelectShift(const Instruction *I);
 
-  bool X86SelectSelect(Instruction *I);
+  bool X86SelectSelect(const Instruction *I);
 
-  bool X86SelectTrunc(Instruction *I);
+  bool X86SelectTrunc(const Instruction *I);
  
-  bool X86SelectFPExt(Instruction *I);
-  bool X86SelectFPTrunc(Instruction *I);
+  bool X86SelectFPExt(const Instruction *I);
+  bool X86SelectFPTrunc(const Instruction *I);
 
-  bool X86SelectExtractValue(Instruction *I);
+  bool X86SelectExtractValue(const Instruction *I);
 
-  bool X86VisitIntrinsicCall(IntrinsicInst &I);
-  bool X86SelectCall(Instruction *I);
+  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
+  bool X86SelectCall(const Instruction *I);
 
   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
 
@@ -125,9 +125,9 @@ private:
     return static_cast<const X86TargetMachine *>(&TM);
   }
 
-  unsigned TargetMaterializeConstant(Constant *C);
+  unsigned TargetMaterializeConstant(const Constant *C);
 
-  unsigned TargetMaterializeAlloca(AllocaInst *C);
+  unsigned TargetMaterializeAlloca(const AllocaInst *C);
 
   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
   /// computed in an SSE register, not on the X87 floating point stack.
@@ -280,14 +280,14 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
   return true;
 }
 
-bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
                                    const X86AddressMode &AM) {
   // Handle 'null' like i32/i64 0.
   if (isa<ConstantPointerNull>(Val))
     Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
   
   // If this is a store of a simple constant, fold the constant into the store.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     unsigned Opc = 0;
     bool Signed = true;
     switch (VT.getSimpleVT().SimpleTy) {
@@ -305,7 +305,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
     
     if (Opc) {
       addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
-                             .addImm(Signed ? CI->getSExtValue() :
+                             .addImm(Signed ? (uint64_t) CI->getSExtValue() :
                                               CI->getZExtValue());
       return true;
     }
@@ -335,13 +335,13 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
 
 /// X86SelectAddress - Attempt to fill in an address from the given value.
 ///
-bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
-  User *U = NULL;
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
     Opcode = I->getOpcode();
     U = I;
-  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
     Opcode = C->getOpcode();
     U = C;
   }
@@ -378,7 +378,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
 
   case Instruction::Add: {
     // Adds of constants are common and easy enough.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
       uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
       // They have to fit in the 32-bit signed displacement field though.
       if (isInt<32>(Disp)) {
@@ -399,16 +399,16 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
     gep_type_iterator GTI = gep_type_begin(U);
     // Iterate through the indices, folding what we can. Constants can be
     // folded, and one dynamic index can be handled, if the scale is supported.
-    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
+    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
          i != e; ++i, ++GTI) {
-      Value *Op = *i;
+      const Value *Op = *i;
       if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
         const StructLayout *SL = TD.getStructLayout(STy);
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         Disp += SL->getElementOffset(Idx);
       } else {
         uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
           // Constant-offset addressing.
           Disp += CI->getSExtValue() * S;
         } else if (IndexReg == 0 &&
@@ -446,7 +446,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
   }
 
   // Handle constant address.
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // Can't handle alternate code models yet.
     if (TM.getCodeModel() != CodeModel::Small)
       return false;
@@ -457,7 +457,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
       return false;
 
     // Can't handle TLS yet.
-    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
       if (GVar->isThreadLocal())
         return false;
 
@@ -544,13 +544,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
 
 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
 ///
-bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
-  User *U = NULL;
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
     Opcode = I->getOpcode();
     U = I;
-  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
     Opcode = C->getOpcode();
     U = C;
   }
@@ -575,7 +575,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
   }
 
   // Handle constant address.
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // Can't handle alternate code models yet.
     if (TM.getCodeModel() != CodeModel::Small)
       return false;
@@ -586,7 +586,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
       return false;
 
     // Can't handle TLS or DLLImport.
-    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
       if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
         return false;
 
@@ -627,7 +627,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
 
 
 /// X86SelectStore - Select and emit code to implement store instructions.
-bool X86FastISel::X86SelectStore(Instruction* I) {
+bool X86FastISel::X86SelectStore(const Instruction *I) {
   EVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
     return false;
@@ -641,7 +641,7 @@ bool X86FastISel::X86SelectStore(Instruction* I) {
 
 /// X86SelectLoad - Select and emit code to implement load instructions.
 ///
-bool X86FastISel::X86SelectLoad(Instruction *I)  {
+bool X86FastISel::X86SelectLoad(const Instruction *I)  {
   EVT VT;
   if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
     return false;
@@ -673,7 +673,7 @@ static unsigned X86ChooseCmpOpcode(EVT VT) {
 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
 /// of the comparison, return an opcode that works for the compare (e.g.
 /// CMP32ri) otherwise return 0.
-static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
   switch (VT.getSimpleVT().SimpleTy) {
   // Otherwise, we can't fold the immediate into this comparison.
   default: return 0;
@@ -689,7 +689,8 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
   }
 }
 
-bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+                                     EVT VT) {
   unsigned Op0Reg = getRegForValue(Op0);
   if (Op0Reg == 0) return false;
   
@@ -700,7 +701,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
   // We have two options: compare with register or immediate.  If the RHS of
   // the compare is an immediate that we can fold into this compare, use
   // CMPri, otherwise use CMPrr.
-  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
       BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
                                           .addImm(Op1C->getSExtValue());
@@ -718,8 +719,8 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
   return true;
 }
 
-bool X86FastISel::X86SelectCmp(Instruction *I) {
-  CmpInst *CI = cast<CmpInst>(I);
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
 
   EVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
@@ -781,7 +782,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
     return false;
   }
 
-  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
   if (SwapArgs)
     std::swap(Op0, Op1);
 
@@ -794,7 +795,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectZExt(Instruction *I) {
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
   // Handle zero-extension from i1 to i8, which is common.
   if (I->getType()->isIntegerTy(8) &&
       I->getOperand(0)->getType()->isIntegerTy(1)) {
@@ -811,15 +812,15 @@ bool X86FastISel::X86SelectZExt(Instruction *I) {
 }
 
 
-bool X86FastISel::X86SelectBranch(Instruction *I) {
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
   // Unconditional branches are selected by tablegen-generated code.
   // Handle a conditional branch.
-  BranchInst *BI = cast<BranchInst>(I);
+  const BranchInst *BI = cast<BranchInst>(I);
   MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
 
   // Fold the common case of a conditional branch with a comparison.
-  if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
     if (CI->hasOneUse()) {
       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
 
@@ -866,7 +867,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
         return false;
       }
       
-      Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+      const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
       if (SwapArgs)
         std::swap(Op0, Op1);
 
@@ -901,7 +902,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
     // looking for the SETO/SETB instruction. If an instruction modifies the
     // EFLAGS register before we reach the SETO/SETB instruction, then we can't
     // convert the branch into a JO/JB instruction.
-    if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
+    if (const IntrinsicInst *CI =
+          dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
       if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
           CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
         const MachineInstr *SetMI = 0;
@@ -956,7 +958,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectShift(Instruction *I) {
+bool X86FastISel::X86SelectShift(const Instruction *I) {
   unsigned CReg = 0, OpReg = 0, OpImm = 0;
   const TargetRegisterClass *RC = NULL;
   if (I->getType()->isIntegerTy(8)) {
@@ -1007,7 +1009,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
   if (Op0Reg == 0) return false;
   
   // Fold immediate in shl(x,3).
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
     unsigned ResultReg = createResultReg(RC);
     BuildMI(MBB, DL, TII.get(OpImm), 
             ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
@@ -1032,7 +1034,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectSelect(Instruction *I) {
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
   EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
     return false;
@@ -1066,11 +1068,11 @@ bool X86FastISel::X86SelectSelect(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectFPExt(Instruction *I) {
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
   // fpext from float to double.
   if (Subtarget->hasSSE2() &&
       I->getType()->isDoubleTy()) {
-    Value *V = I->getOperand(0);
+    const Value *V = I->getOperand(0);
     if (V->getType()->isFloatTy()) {
       unsigned OpReg = getRegForValue(V);
       if (OpReg == 0) return false;
@@ -1084,10 +1086,10 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) {
   return false;
 }
 
-bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
   if (Subtarget->hasSSE2()) {
     if (I->getType()->isFloatTy()) {
-      Value *V = I->getOperand(0);
+      const Value *V = I->getOperand(0);
       if (V->getType()->isDoubleTy()) {
         unsigned OpReg = getRegForValue(V);
         if (OpReg == 0) return false;
@@ -1102,7 +1104,7 @@ bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
   return false;
 }
 
-bool X86FastISel::X86SelectTrunc(Instruction *I) {
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
   if (Subtarget->is64Bit())
     // All other cases should be handled by the tblgen generated code.
     return false;
@@ -1139,11 +1141,11 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) {
   return true;
 }
 
-bool X86FastISel::X86SelectExtractValue(Instruction *I) {
-  ExtractValueInst *EI = cast<ExtractValueInst>(I);
-  Value *Agg = EI->getAggregateOperand();
+bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
+  const ExtractValueInst *EI = cast<ExtractValueInst>(I);
+  const Value *Agg = EI->getAggregateOperand();
 
-  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
+  if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
     switch (CI->getIntrinsicID()) {
     default: break;
     case Intrinsic::sadd_with_overflow:
@@ -1160,7 +1162,7 @@ bool X86FastISel::X86SelectExtractValue(Instruction *I) {
   return false;
 }
 
-bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
+bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
@@ -1168,8 +1170,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     // Emit code inline code to store the stack guard onto the stack.
     EVT PtrTy = TLI.getPointerTy();
 
-    Value *Op1 = I.getOperand(1); // The guard's value.
-    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+    const Value *Op1 = I.getOperand(1); // The guard's value.
+    const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
 
     // Grab the frame index.
     X86AddressMode AM;
@@ -1204,7 +1206,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     return true;
   }
   case Intrinsic::dbg_declare: {
-    DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
     X86AddressMode AM;
     assert(DI->getAddress() && "Null address should be checked earlier!");
     if (!X86SelectAddress(DI->getAddress(), AM))
@@ -1235,8 +1237,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     if (!isTypeLegal(RetTy, VT))
       return false;
 
-    Value *Op1 = I.getOperand(1);
-    Value *Op2 = I.getOperand(2);
+    const Value *Op1 = I.getOperand(1);
+    const Value *Op2 = I.getOperand(2);
     unsigned Reg1 = getRegForValue(Op1);
     unsigned Reg2 = getRegForValue(Op2);
 
@@ -1277,20 +1279,20 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
   }
 }
 
-bool X86FastISel::X86SelectCall(Instruction *I) {
-  CallInst *CI = cast<CallInst>(I);
-  Value *Callee = I->getOperand(0);
+bool X86FastISel::X86SelectCall(const Instruction *I) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = I->getOperand(0);
 
   // Can't handle inline asm yet.
   if (isa<InlineAsm>(Callee))
     return false;
 
   // Handle intrinsic calls.
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
     return X86VisitIntrinsicCall(*II);
 
   // Handle only C and fastcc calling conventions for now.
-  CallSite CS(CI);
+  ImmutableCallSite CS(CI);
   CallingConv::ID CC = CS.getCallingConv();
   if (CC != CallingConv::C &&
       CC != CallingConv::Fast &&
@@ -1322,7 +1324,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   if (!X86SelectCallAddress(Callee, CalleeAM))
     return false;
   unsigned CalleeOp = 0;
-  GlobalValue *GV = 0;
+  const GlobalValue *GV = 0;
   if (CalleeAM.GV != 0) {
     GV = CalleeAM.GV;
   } else if (CalleeAM.Base.Reg != 0) {
@@ -1338,7 +1340,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   }
 
   // Deal with call operands first.
-  SmallVector<Value*, 8> ArgVals;
+  SmallVector<const Value *, 8> ArgVals;
   SmallVector<unsigned, 8> Args;
   SmallVector<EVT, 8> ArgVTs;
   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
@@ -1346,7 +1348,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
   ArgVals.reserve(CS.arg_size());
   ArgVTs.reserve(CS.arg_size());
   ArgFlags.reserve(CS.arg_size());
-  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     unsigned Arg = getRegForValue(*i);
     if (Arg == 0)
@@ -1454,7 +1456,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
       X86AddressMode AM;
       AM.Base.Reg = StackPtr;
       AM.Disp = LocMemOffset;
-      Value *ArgVal = ArgVals[VA.getValNo()];
+      const Value *ArgVal = ArgVals[VA.getValNo()];
       
       // If this is a really simple value, emit this with the Value* version of
       // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
@@ -1585,7 +1587,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
 
 
 bool
-X86FastISel::TargetSelectInstruction(Instruction *I)  {
+X86FastISel::TargetSelectInstruction(const Instruction *I)  {
   switch (I->getOpcode()) {
   default: break;
   case Instruction::Load:
@@ -1633,7 +1635,7 @@ X86FastISel::TargetSelectInstruction(Instruction *I)  {
   return false;
 }
 
-unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
+unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
   EVT VT;
   if (!isTypeLegal(C->getType(), VT))
     return false;
@@ -1728,7 +1730,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
   return ResultReg;
 }
 
-unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
+unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
   // Fail on dynamic allocas. At this point, getRegForValue has already
   // checked its CSE maps, so if we're here trying to handle a dynamic
   // alloca, we're not going to succeed. X86SelectAddress has a
@@ -1753,12 +1755,13 @@ namespace llvm {
   llvm::FastISel *X86::createFastISel(MachineFunction &mf,
                         DenseMap<const Value *, unsigned> &vm,
                         DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
-                        DenseMap<const AllocaInst *, int> &am
+                        DenseMap<const AllocaInst *, int> &am,
+                        std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                        , SmallSet<Instruction*, 8> &cil
+                        , SmallSet<const Instruction *, 8> &cil
 #endif
                         ) {
-    return new X86FastISel(mf, vm, bm, am
+    return new X86FastISel(mf, vm, bm, am, pn
 #ifndef NDEBUG
                            , cil
 #endif
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 6d6fe77..93460ef 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1088,8 +1088,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     // 'f' constraint.  These should be turned into the current ST(x) register
     // in the machine instr.  Also, any kills should be explicitly popped after
     // the inline asm.
-    unsigned Kills[7];
-    unsigned NumKills = 0;
+    unsigned Kills = 0;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &Op = MI->getOperand(i);
       if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
@@ -1103,7 +1102,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
       // asm.  We just remember it for now, and pop them all off at the end in
       // a batch.
       if (Op.isKill())
-        Kills[NumKills++] = FPReg;
+        Kills |= 1U << FPReg;
     }
 
     // If this asm kills any FP registers (is the last use of them) we must
@@ -1114,9 +1113,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     // Note: this might be a non-optimal pop sequence.  We might be able to do
     // better by trying to pop in stack order or something.
     MachineBasicBlock::iterator InsertPt = MI;
-    while (NumKills)
-      freeStackSlotAfter(InsertPt, Kills[--NumKills]);
-
+    while (Kills) {
+      unsigned FPReg = CountTrailingZeros_32(Kills);
+      freeStackSlotAfter(InsertPt, FPReg);
+      Kills &= ~(1U << FPReg);
+    }
     // Don't delete the inline asm!
     return;
   }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index da45dac..fd8bb1e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -15,12 +15,10 @@
 #define DEBUG_TYPE "x86-isel"
 #include "X86.h"
 #include "X86InstrBuilder.h"
-#include "X86ISelLowering.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/GlobalValue.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/CFG.h"
@@ -57,25 +55,24 @@ namespace {
       FrameIndexBase
     } BaseType;
 
-    struct {            // This is really a union, discriminated by BaseType!
-      SDValue Reg;
-      int FrameIndex;
-    } Base;
+    // This is really a union, discriminated by BaseType!
+    SDValue Base_Reg;
+    int Base_FrameIndex;
 
     unsigned Scale;
     SDValue IndexReg; 
     int32_t Disp;
     SDValue Segment;
-    GlobalValue *GV;
-    Constant *CP;
-    BlockAddress *BlockAddr;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
     const char *ES;
     int JT;
     unsigned Align;    // CP alignment.
     unsigned char SymbolFlags;  // X86II::MO_*
 
     X86ISelAddressMode()
-      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
+      : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
         Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
         SymbolFlags(X86II::MO_NO_FLAG) {
     }
@@ -85,7 +82,7 @@ namespace {
     }
     
     bool hasBaseOrIndexReg() const {
-      return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
+      return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
     }
     
     /// isRIPRelative - Return true if this addressing mode is already RIP
@@ -93,24 +90,24 @@ namespace {
     bool isRIPRelative() const {
       if (BaseType != RegBase) return false;
       if (RegisterSDNode *RegNode =
-            dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
+            dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
         return RegNode->getReg() == X86::RIP;
       return false;
     }
     
     void setBaseReg(SDValue Reg) {
       BaseType = RegBase;
-      Base.Reg = Reg;
+      Base_Reg = Reg;
     }
 
     void dump() {
       dbgs() << "X86ISelAddressMode " << this << '\n';
-      dbgs() << "Base.Reg ";
-      if (Base.Reg.getNode() != 0)
-        Base.Reg.getNode()->dump(); 
+      dbgs() << "Base_Reg ";
+      if (Base_Reg.getNode() != 0)
+        Base_Reg.getNode()->dump(); 
       else
         dbgs() << "nul";
-      dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
+      dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
              << " Scale" << Scale << '\n'
              << "IndexReg ";
       if (IndexReg.getNode() != 0)
@@ -162,7 +159,7 @@ namespace {
   class X86DAGToDAGISel : public SelectionDAGISel {
     /// X86Lowering - This object fully describes how to lower LLVM code to an
     /// X86-specific SelectionDAG.
-    X86TargetLowering &X86Lowering;
+    const X86TargetLowering &X86Lowering;
 
     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
     /// make the right decision when generating code for different targets.
@@ -183,7 +180,7 @@ namespace {
       return "X86 DAG->DAG Instruction Selection";
     }
 
-    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
+    virtual void EmitFunctionEntryCode();
 
     virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
 
@@ -235,8 +232,8 @@ namespace {
                                    SDValue &Scale, SDValue &Index,
                                    SDValue &Disp, SDValue &Segment) {
       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
-        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
-        AM.Base.Reg;
+        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+        AM.Base_Reg;
       Scale = getI8Imm(AM.Scale);
       Index = AM.IndexReg;
       // These are 32-bit even in 64-bit mode since RIP relative offset
@@ -546,11 +543,11 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
             TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
 }
 
-void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
   // If this is main, emit special code for main.
-  MachineBasicBlock *BB = MF.begin();
-  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
-    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
+  if (const Function *Fn = MF->getFunction())
+    if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+      EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
 }
 
 
@@ -675,8 +672,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   // a smaller encoding and avoids a scaled-index.
   if (AM.Scale == 2 &&
       AM.BaseType == X86ISelAddressMode::RegBase &&
-      AM.Base.Reg.getNode() == 0) {
-    AM.Base.Reg = AM.IndexReg;
+      AM.Base_Reg.getNode() == 0) {
+    AM.Base_Reg = AM.IndexReg;
     AM.Scale = 1;
   }
 
@@ -687,15 +684,34 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
       Subtarget->is64Bit() &&
       AM.Scale == 1 &&
       AM.BaseType == X86ISelAddressMode::RegBase &&
-      AM.Base.Reg.getNode() == 0 &&
+      AM.Base_Reg.getNode() == 0 &&
       AM.IndexReg.getNode() == 0 &&
       AM.SymbolFlags == X86II::MO_NO_FLAG &&
       AM.hasSymbolicDisplacement())
-    AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
 
   return false;
 }
 
+/// isLogicallyAddWithConstant - Return true if this node is semantically an
+/// add of a value with a constantint.
+static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
+  // Check for (add x, Cst)
+  if (V->getOpcode() == ISD::ADD)
+    return isa<ConstantSDNode>(V->getOperand(1));
+
+  // Check for (or x, Cst), where Cst & x == 0.
+  if (V->getOpcode() != ISD::OR ||
+      !isa<ConstantSDNode>(V->getOperand(1)))
+    return false;
+  
+  // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+  ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
+    
+  // Check to see if the LHS & C is zero.
+  return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
+}
+
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               X86ISelListener &DeadNodes,
                                               unsigned Depth) {
@@ -762,9 +778,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::FrameIndex:
     if (AM.BaseType == X86ISelAddressMode::RegBase
-        && AM.Base.Reg.getNode() == 0) {
+        && AM.Base_Reg.getNode() == 0) {
       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
-      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
       return false;
     }
     break;
@@ -787,8 +803,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
         // Okay, we know that we have a scale by now.  However, if the scaled
         // value is an add of something and a constant, we can fold the
         // constant into the disp field here.
-        if (ShVal.getNode()->getOpcode() == ISD::ADD &&
-            isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
+        if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
           AM.IndexReg = ShVal.getNode()->getOperand(0);
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@@ -816,7 +831,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   case X86ISD::MUL_IMM:
     // X*[3,5,9] -> X+X*[2,4,8]
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
-        AM.Base.Reg.getNode() == 0 &&
+        AM.Base_Reg.getNode() == 0 &&
         AM.IndexReg.getNode() == 0) {
       if (ConstantSDNode
             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
@@ -847,7 +862,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
             Reg = N.getNode()->getOperand(0);
           }
 
-          AM.IndexReg = AM.Base.Reg = Reg;
+          AM.IndexReg = AM.Base_Reg = Reg;
           return false;
         }
     }
@@ -892,8 +907,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // If the base is a register with multiple uses, this
     // transformation may save a mov.
     if ((AM.BaseType == X86ISelAddressMode::RegBase &&
-         AM.Base.Reg.getNode() &&
-         !AM.Base.Reg.getNode()->hasOneUse()) ||
+         AM.Base_Reg.getNode() &&
+         !AM.Base_Reg.getNode()->hasOneUse()) ||
         AM.BaseType == X86ISelAddressMode::FrameIndexBase)
       --Cost;
     // If the folded LHS was interesting, this transformation saves
@@ -963,9 +978,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // see if we can just put each operand into a register and fold at least
     // the add.
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
-        !AM.Base.Reg.getNode() &&
+        !AM.Base_Reg.getNode() &&
         !AM.IndexReg.getNode()) {
-      AM.Base.Reg = N.getNode()->getOperand(0);
+      AM.Base_Reg = N.getNode()->getOperand(0);
       AM.IndexReg = N.getNode()->getOperand(1);
       AM.Scale = 1;
       return false;
@@ -975,14 +990,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::OR:
     // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    if (isLogicallyAddWithConstant(N, CurDAG)) {
       X86ISelAddressMode Backup = AM;
+      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
       uint64_t Offset = CN->getSExtValue();
 
-      // Check to see if the LHS & C is zero.
-      if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue()))
-        break;
-
       // Start with the LHS as an addr mode.
       if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) &&
           // Address could not have picked a GV address for the displacement.
@@ -1127,7 +1139,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 /// specified addressing mode without any further recursion.
 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   // Is the base register already occupied?
-  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
     // If so, check to see if the scale index register is set.
     if (AM.IndexReg.getNode() == 0) {
       AM.IndexReg = N;
@@ -1141,7 +1153,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
 
   // Default, generate it as a register.
   AM.BaseType = X86ISelAddressMode::RegBase;
-  AM.Base.Reg = N;
+  AM.Base_Reg = N;
   return false;
 }
 
@@ -1157,8 +1169,8 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
 
   EVT VT = N.getValueType();
   if (AM.BaseType == X86ISelAddressMode::RegBase) {
-    if (!AM.Base.Reg.getNode())
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
+    if (!AM.Base_Reg.getNode())
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
   }
 
   if (!AM.IndexReg.getNode())
@@ -1185,7 +1197,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
         PatternNodeWithChain.hasOneUse() &&
         IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
-        IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
+        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
       if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
         return false;
@@ -1202,7 +1214,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
       ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
       N.getOperand(0).getOperand(0).hasOneUse() &&
       IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
-      IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
+      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
     // Okay, this is a zero extending load.  Fold it.
     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
     if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
@@ -1234,10 +1246,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
   EVT VT = N.getValueType();
   unsigned Complexity = 0;
   if (AM.BaseType == X86ISelAddressMode::RegBase)
-    if (AM.Base.Reg.getNode())
+    if (AM.Base_Reg.getNode())
       Complexity = 1;
     else
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
   else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
     Complexity = 4;
 
@@ -1265,7 +1277,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
       Complexity += 2;
   }
 
-  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
     Complexity++;
 
   // If it isn't worth using an LEA, reject it.
@@ -1287,7 +1299,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
   X86ISelAddressMode AM;
   AM.GV = GA->getGlobal();
   AM.Disp += GA->getOffset();
-  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
+  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
   AM.SymbolFlags = GA->getTargetFlags();
 
   if (N.getValueType() == MVT::i32) {
@@ -1309,7 +1321,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                   SDValue &Segment) {
   if (!ISD::isNON_EXTLoad(N.getNode()) ||
       !IsProfitableToFold(N, P, P) ||
-      !IsLegalToFold(N, P, P))
+      !IsLegalToFold(N, P, P, OptLevel))
     return false;
   
   return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
@@ -1841,6 +1853,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
     // use a smaller encoding.
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
+      // Look past the truncate if CMP is the only use of it.
+      N0 = N0.getOperand(0);
     if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
         N0.getValueType() != MVT::i8 &&
         X86::isZeroNode(N1)) {
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 64702f1..6ce9ab7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -57,14 +57,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
 static cl::opt<bool>
 DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
 
-// Disable16Bit - 16-bit operations typically have a larger encoding than
-// corresponding 32-bit instructions, and 16-bit code is slow on some
-// processors. This is an experimental flag to disable 16-bit operations
-// (which forces them to be Legalized to 32-bit operations).
-static cl::opt<bool>
-Disable16Bit("disable-16bit", cl::Hidden,
-             cl::desc("Disable use of 16-bit instructions"));
-
 // Forward declarations.
 static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
@@ -120,8 +112,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // Set up the register classes.
   addRegisterClass(MVT::i8, X86::GR8RegisterClass);
-  if (!Disable16Bit)
-    addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
   addRegisterClass(MVT::i32, X86::GR32RegisterClass);
   if (Subtarget->is64Bit())
     addRegisterClass(MVT::i64, X86::GR64RegisterClass);
@@ -130,11 +121,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // We don't accept any truncstore of integer registers.
   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
-  if (!Disable16Bit)
-    setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
-  if (!Disable16Bit)
-    setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
 
@@ -285,13 +274,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
   setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
-  if (Disable16Bit) {
-    setOperationAction(ISD::CTTZ           , MVT::i16  , Expand);
-    setOperationAction(ISD::CTLZ           , MVT::i16  , Expand);
-  } else {
-    setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
-    setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
-  }
+  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
   setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
@@ -308,19 +292,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
   // X86 wants to expand cmov itself.
   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
-  if (Disable16Bit)
-    setOperationAction(ISD::SELECT        , MVT::i16  , Expand);
-  else
-    setOperationAction(ISD::SELECT        , MVT::i16  , Custom);
+  setOperationAction(ISD::SELECT        , MVT::i16  , Custom);
   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
-  if (Disable16Bit)
-    setOperationAction(ISD::SETCC         , MVT::i16  , Expand);
-  else
-    setOperationAction(ISD::SETCC         , MVT::i16  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
@@ -623,11 +601,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
   if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
-    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
-    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
-    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
-    addRegisterClass(MVT::v2f32, X86::VR64RegisterClass);
-    addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
+    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false);
+    addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
 
     setOperationAction(ISD::ADD,                MVT::v8i8,  Legal);
     setOperationAction(ISD::ADD,                MVT::v4i16, Legal);
@@ -1067,23 +1045,27 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
 }
 
 /// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove lowering.
-/// If DstAlign is zero that means it's safe to destination alignment can
-/// satisfy any constraint. Similarly if SrcAlign is zero it means there
-/// isn't a need to check it against alignment requirement, probably because
-/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that
-/// means it's safe to return a non-scalar-integer type, e.g. constant string
-/// source or loaded from memory. It returns EVT::Other if SelectionDAG should
-/// be responsible for determining it.
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
 EVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        unsigned DstAlign, unsigned SrcAlign,
                                        bool NonScalarIntSafe,
-                                       SelectionDAG &DAG) const {
+                                       bool MemcpyStrSrc,
+                                       MachineFunction &MF) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
-  const Function *F = DAG.getMachineFunction().getFunction();
+  const Function *F = MF.getFunction();
   if (NonScalarIntSafe &&
       !F->hasFnAttr(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
@@ -1095,11 +1077,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
         return MVT::v4i32;
       if (Subtarget->hasSSE1())
         return MVT::v4f32;
-    } else if (Size >= 8 &&
+    } else if (!MemcpyStrSrc && Size >= 8 &&
                !Subtarget->is64Bit() &&
                Subtarget->getStackAlignment() >= 8 &&
-               Subtarget->hasSSE2())
+               Subtarget->hasSSE2()) {
+      // Do not use f64 to lower memcpy if source is string constant. It's
+      // better to use i32 to avoid the loads.
       return MVT::f64;
+    }
   }
   if (Subtarget->is64Bit() && Size >= 8)
     return MVT::i64;
@@ -1182,7 +1167,7 @@ bool
 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                         const SmallVectorImpl<EVT> &OutTys,
                         const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-                        SelectionDAG &DAG) {
+                        SelectionDAG &DAG) const {
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  RVLocs, *DAG.getContext());
@@ -1193,7 +1178,9 @@ SDValue
 X86TargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               DebugLoc dl, SelectionDAG &DAG) {
+                               DebugLoc dl, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
@@ -1211,7 +1198,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
   SmallVector<SDValue, 6> RetOps;
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   // Operand #1 = Bytes To Pop
-  RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16));
+  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+                   MVT::i16));
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1287,7 +1275,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) {
+                                   SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -1304,7 +1292,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
-      llvm_report_error("SSE register return with SSE disabled");
+      report_fatal_error("SSE register return with SSE disabled");
     }
 
     // If this is a call to a function that returns an fp value on the floating
@@ -1384,7 +1372,8 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
 
 /// IsCalleePop - Determines whether the callee is required to pop its
 /// own arguments. Callee pop is necessary to support tail calls.
-bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
+bool X86TargetLowering::IsCalleePop(bool IsVarArg,
+                                    CallingConv::ID CallingConv) const {
   if (IsVarArg)
     return false;
 
@@ -1457,7 +1446,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
                                     MachineFrameInfo *MFI,
-                                    unsigned i) {
+                                    unsigned i) const {
   // Create the nodes corresponding to a load from this parameter slot.
   ISD::ArgFlagsTy Flags = Ins[i].Flags;
   bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
@@ -1496,7 +1485,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                         DebugLoc dl,
                                         SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals) {
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
@@ -1607,7 +1597,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
     if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
-      VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false);
+      FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
+                                                            true, false));
     }
     if (Is64Bit) {
       unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
@@ -1655,16 +1646,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       // For X86-64, if there are vararg parameters that are passed via
       // registers, then we must store them to their spots on the stack so they
       // may be loaded by deferencing the result of va_next.
-      VarArgsGPOffset = NumIntRegs * 8;
-      VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
-      RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
-                                                 TotalNumXMMRegs * 16, 16,
-                                                 false);
+      FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+      FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+      FuncInfo->setRegSaveFrameIndex(
+        MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+                               false));
 
       // Store the integer parameter registers.
       SmallVector<SDValue, 8> MemOps;
-      SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
-      unsigned Offset = VarArgsGPOffset;
+      SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                        getPointerTy());
+      unsigned Offset = FuncInfo->getVarArgsGPOffset();
       for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
         SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                                   DAG.getIntPtrConstant(Offset));
@@ -1673,7 +1665,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
+                       PseudoSourceValue::getFixedStack(
+                         FuncInfo->getRegSaveFrameIndex()),
                        Offset, false, false, 0);
         MemOps.push_back(Store);
         Offset += 8;
@@ -1688,8 +1681,10 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
         SaveXMMOps.push_back(ALVal);
 
-        SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex));
-        SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset));
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getRegSaveFrameIndex()));
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getVarArgsFPOffset()));
 
         for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
           unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
@@ -1710,22 +1705,22 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   // Some CCs need callee pop.
   if (IsCalleePop(isVarArg, CallConv)) {
-    BytesToPopOnReturn  = StackSize; // Callee pops everything.
+    FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
   } else {
-    BytesToPopOnReturn  = 0; // Callee pops nothing.
+    FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
     if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
-      BytesToPopOnReturn = 4;
+      FuncInfo->setBytesToPopOnReturn(4);
   }
 
   if (!Is64Bit) {
-    RegSaveFrameIndex = 0xAAAAAAA;   // RegSaveFrameIndex is X86-64 only.
+    // RegSaveFrameIndex is X86-64 only.
+    FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
     if (CallConv == CallingConv::X86_FastCall)
-      VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
+      // fastcc functions can't have varargs.
+      FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
   }
 
-  FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
   return Chain;
 }
 
@@ -1734,7 +1729,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     SDValue StackPtr, SDValue Arg,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
-                                    ISD::ArgFlagsTy Flags) {
+                                    ISD::ArgFlagsTy Flags) const {
   const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
   unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
@@ -1753,7 +1748,7 @@ SDValue
 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
                                            SDValue &OutRetAddr, SDValue Chain,
                                            bool IsTailCall, bool Is64Bit,
-                                           int FPDiff, DebugLoc dl) {
+                                           int FPDiff, DebugLoc dl) const {
   // Adjust the Return address stack slot.
   EVT VT = getPointerTy();
   OutRetAddr = getReturnAddressFrameIndex(DAG);
@@ -1790,7 +1785,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals) {
+                             SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsStructRet    = CallIsStructReturn(Outs);
@@ -2060,7 +2055,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     // We should use extra load for direct calls to dllimported functions in
     // non-JIT mode.
-    GlobalValue *GV = G->getGlobal();
+    const GlobalValue *GV = G->getGlobal();
     if (!GV->hasDLLImportLinkage()) {
       unsigned char OpFlags = 0;
 
@@ -2219,8 +2214,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
 /// for a 16 byte align requirement.
-unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
-                                                        SelectionDAG& DAG) {
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+                                               SelectionDAG& DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetMachine &TM = MF.getTarget();
   const TargetFrameInfo &TFI = *TM.getFrameInfo();
@@ -2308,9 +2304,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // If -tailcallopt is specified, make fastcc functions tail-callable.
   const MachineFunction &MF = DAG.getMachineFunction();
   const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
   if (GuaranteedTailCallOpt) {
-    if (IsTailCallConvention(CalleeCC) &&
-        CallerF->getCallingConv() == CalleeCC)
+    if (IsTailCallConvention(CalleeCC) && CCMatch)
       return true;
     return false;
   }
@@ -2348,13 +2346,43 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     CCState CCInfo(CalleeCC, false, getTargetMachine(),
                    RVLocs, *DAG.getContext());
     CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
-    for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
       CCValAssign &VA = RVLocs[i];
       if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
         return false;
     }
   }
 
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+                    RVLocs1, *DAG.getContext());
+    CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    CCState CCInfo2(CallerCC, false, getTargetMachine(),
+                    RVLocs2, *DAG.getContext());
+    CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
@@ -2401,12 +2429,13 @@ FastISel *
 X86TargetLowering::createFastISel(MachineFunction &mf,
                             DenseMap<const Value *, unsigned> &vm,
                             DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
-                            DenseMap<const AllocaInst *, int> &am
+                            DenseMap<const AllocaInst *, int> &am,
+                            std::vector<std::pair<MachineInstr*, unsigned> > &pn
 #ifndef NDEBUG
-                          , SmallSet<Instruction*, 8> &cil
+                          , SmallSet<const Instruction *, 8> &cil
 #endif
-                                  ) {
-  return X86::createFastISel(mf, vm, bm, am
+                                  ) const {
+  return X86::createFastISel(mf, vm, bm, am, pn
 #ifndef NDEBUG
                              , cil
 #endif
@@ -2419,7 +2448,7 @@ X86TargetLowering::createFastISel(MachineFunction &mf,
 //===----------------------------------------------------------------------===//
 
 
-SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -3440,7 +3469,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
 /// FIXME: split into pslldqi, psrldqi, palignr variants.
 static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
                           bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
-  int NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
 
   isLeft = true;
   unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
@@ -3452,11 +3481,12 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
   }
   bool SeenV1 = false;
   bool SeenV2 = false;
-  for (int i = NumZeros; i < NumElems; ++i) {
-    int Val = isLeft ? (i - NumZeros) : i;
-    int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
-    if (Idx < 0)
+  for (unsigned i = NumZeros; i < NumElems; ++i) {
+    unsigned Val = isLeft ? (i - NumZeros) : i;
+    int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
+    if (Idx_ < 0)
       continue;
+    unsigned Idx = (unsigned) Idx_;
     if (Idx < NumElems)
       SeenV1 = true;
     else {
@@ -3479,7 +3509,8 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
 ///
 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
                                        unsigned NumNonZero, unsigned NumZero,
-                                       SelectionDAG &DAG, TargetLowering &TLI) {
+                                       SelectionDAG &DAG,
+                                       const TargetLowering &TLI) {
   if (NumNonZero > 8)
     return SDValue();
 
@@ -3524,8 +3555,9 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
 ///
 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
-                                       unsigned NumNonZero, unsigned NumZero,
-                                       SelectionDAG &DAG, TargetLowering &TLI) {
+                                     unsigned NumNonZero, unsigned NumZero,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
   if (NumNonZero > 4)
     return SDValue();
 
@@ -3567,7 +3599,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
 
 SDValue
 X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
-                                          SelectionDAG &DAG) {
+                                          SelectionDAG &DAG) const {
   
   // Check if the scalar load can be widened into a vector load. And if
   // the address is "base + cst" see if the cst can be "absorbed" into
@@ -3699,7 +3731,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 }
 
 SDValue
-X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // All zero's are handled with pxor, all one's are handled with pcmpeqd.
   if (ISD::isBuildVectorAllZeros(Op.getNode())
@@ -3965,7 +3997,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   // We support concatenate two MMX registers and place them in a MMX
   // register.  This is better than doing a stack convert.
   DebugLoc dl = Op.getDebugLoc();
@@ -3998,7 +4030,8 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
 // 4. [all]   mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
 static
 SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG, X86TargetLowering &TLI) {
+                                 SelectionDAG &DAG,
+                                 const X86TargetLowering &TLI) {
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
@@ -4241,7 +4274,8 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
 // 3. [all]   v8i16 shuffle + N x pextrw + rotate + pinsrw
 static
 SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG, X86TargetLowering &TLI) {
+                                 SelectionDAG &DAG,
+                                 const X86TargetLowering &TLI) {
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
@@ -4387,7 +4421,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
 static
 SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
                                  SelectionDAG &DAG,
-                                 TargetLowering &TLI, DebugLoc dl) {
+                                 const TargetLowering &TLI, DebugLoc dl) {
   EVT VT = SVOp->getValueType(0);
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
@@ -4619,7 +4653,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -4806,7 +4840,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 
 SDValue
 X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
-                                                SelectionDAG &DAG) {
+                                                SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   if (VT.getSizeInBits() == 8) {
@@ -4860,7 +4894,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
 
 
 SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                           SelectionDAG &DAG) const {
   if (!isa<ConstantSDNode>(Op.getOperand(1)))
     return SDValue();
 
@@ -4924,7 +4959,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
+                                               SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
@@ -4973,7 +5009,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
 }
 
 SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT EltVT = VT.getVectorElementType();
 
@@ -5002,7 +5038,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   if (Op.getValueType() == MVT::v2f32)
     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
@@ -5033,7 +5069,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 // be used to form addressing mode. These wrapped nodes will be selected
 // into MOV32ri.
 SDValue
-X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -5066,7 +5102,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   return Result;
 }
 
-SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -5100,7 +5136,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
 
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -5136,12 +5172,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   // Create the TargetBlockAddressAddress node.
   unsigned char OpFlags =
     Subtarget->ClassifyBlockAddressReference();
   CodeModel::Model M = getTargetMachine().getCodeModel();
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   DebugLoc dl = Op.getDebugLoc();
   SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
                                        /*isTarget=*/true, OpFlags);
@@ -5210,7 +5246,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
 }
 
 SDValue
-X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
   return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
@@ -5310,7 +5346,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 }
 
 SDValue
-X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   // TODO: implement the "local dynamic" model
   // TODO: implement the "initial exec"model for pic executables
   assert(Subtarget->isTargetELF() &&
@@ -5346,7 +5382,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
 /// take a 2 x i32 value to shift plus a shift amount.
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -5390,7 +5426,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
-SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
   EVT SrcVT = Op.getOperand(0).getValueType();
 
   if (SrcVT.isVector()) {
@@ -5426,7 +5463,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
 
 SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
                                      SDValue StackSlot,
-                                     SelectionDAG &DAG) {
+                                     SelectionDAG &DAG) const {
   // Build the FILD
   DebugLoc dl = Op.getDebugLoc();
   SDVTList Tys;
@@ -5463,7 +5500,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
 }
 
 // LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
+                                               SelectionDAG &DAG) const {
   // This algorithm is not obvious. Here it is in C code, more or less:
   /*
     double uint64_to_double( uint32_t hi, uint32_t lo ) {
@@ -5547,7 +5585,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
 }
 
 // LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
-SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
+                                               SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // FP constant to bias correct the final result.
   SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
@@ -5592,7 +5631,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
   return Sub;
 }
 
-SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDValue N0 = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
 
@@ -5628,7 +5668,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
 }
 
 std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
   DebugLoc dl = Op.getDebugLoc();
 
   EVT DstTy = Op.getValueType();
@@ -5690,7 +5730,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
   return std::make_pair(FIST, StackSlot);
 }
 
-SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
   if (Op.getValueType().isVector()) {
     if (Op.getValueType() == MVT::v2i32 &&
         Op.getOperand(0).getValueType() == MVT::v2f64) {
@@ -5709,7 +5750,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
                      FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
-SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   assert(FIST.getNode() && "Unexpected failure");
@@ -5719,7 +5761,8 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
                      FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
-SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFABS(SDValue Op,
+                                     SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
@@ -5746,7 +5789,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
 }
 
-SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
@@ -5781,7 +5824,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
-SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -5857,7 +5900,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
 /// Emit nodes that will be selected as "test Op0,Op0", or something
 /// equivalent.
 SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
-                                    SelectionDAG &DAG) {
+                                    SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
   // CF and OF aren't always set the way we want. Determine which
@@ -5885,15 +5928,20 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
     unsigned NumOperands = 0;
     switch (Op.getNode()->getOpcode()) {
     case ISD::ADD:
-      // Due to an isel shortcoming, be conservative if this add is likely to
-      // be selected as part of a load-modify-store instruction. When the root
-      // node in a match is a store, isel doesn't know how to remap non-chain
-      // non-flag uses of other nodes in the match, such as the ADD in this
-      // case. This leads to the ADD being left around and reselected, with
-      // the result being two adds in the output.
+      // Due to an isel shortcoming, be conservative if this add is
+      // likely to be selected as part of a load-modify-store
+      // instruction. When the root node in a match is a store, isel
+      // doesn't know how to remap non-chain non-flag uses of other
+      // nodes in the match, such as the ADD in this case. This leads
+      // to the ADD being left around and reselected, with the result
+      // being two adds in the output.  Alas, even if none our users
+      // are stores, that doesn't prove we're O.K.  Ergo, if we have
+      // any parents that aren't CopyToReg or SETCC, eschew INC/DEC.
+      // A better fix seems to require climbing the DAG back to the
+      // root, and it doesn't seem to be worth the effort.
       for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-           UE = Op.getNode()->use_end(); UI != UE; ++UI)
-        if (UI->getOpcode() == ISD::STORE)
+             UE = Op.getNode()->use_end(); UI != UE; ++UI)
+        if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
           goto default_case;
       if (ConstantSDNode *C =
             dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
@@ -5988,7 +6036,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
 /// equivalent.
 SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
-                                   SelectionDAG &DAG) {
+                                   SelectionDAG &DAG) const {
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
     if (C->getAPIntValue() == 0)
       return EmitTest(Op0, X86CC, DAG);
@@ -5999,8 +6047,8 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
 
 /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
 /// if it's possible.
-static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
-                         DebugLoc dl, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
+                                     DebugLoc dl, SelectionDAG &DAG) const {
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
   if (Op0.getOpcode() == ISD::TRUNCATE)
@@ -6031,11 +6079,13 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
   }
 
   if (LHS.getNode()) {
-    // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
+    // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
     // instruction.  Since the shift amount is in-range-or-undefined, we know
-    // that doing a bittest on the i16 value is ok.  We extend to i32 because
+    // that doing a bittest on the i32 value is ok.  We extend to i32 because
     // the encoding for the i16 version is larger than the i32 version.
-    if (LHS.getValueType() == MVT::i8)
+    // Also promote i16 to i32 for performance / code size reason.
+    if (LHS.getValueType() == MVT::i8 ||
+        LHS.getValueType() == MVT::i16)
       LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
 
     // If the operand types disagree, extend the shift amount to match.  Since
@@ -6052,7 +6102,7 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
   return SDValue();
 }
 
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -6088,7 +6138,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
                        DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
   }
 
-  bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+  bool isFP = Op1.getValueType().isFloatingPoint();
   unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
   if (X86CC == X86::COND_INVALID)
     return SDValue();
@@ -6106,7 +6156,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
                      DAG.getConstant(X86CC, MVT::i8), Cond);
 }
 
-SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cond;
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -6242,7 +6292,7 @@ static bool isX86LogicalCmp(SDValue Op) {
   return false;
 }
 
-SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   bool addTest = true;
   SDValue Cond  = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
@@ -6362,7 +6412,7 @@ static bool isXor1OfSetCC(SDValue Op) {
   return false;
 }
 
-SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   bool addTest = true;
   SDValue Chain = Op.getOperand(0);
   SDValue Cond  = Op.getOperand(1);
@@ -6514,7 +6564,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
 // correct sequence.
 SDValue
 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   assert(Subtarget->isTargetCygMing() &&
          "This should be used only on Cygwin/Mingw targets");
   DebugLoc dl = Op.getDebugLoc();
@@ -6550,7 +6600,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                            SDValue Size, unsigned Align,
                                            bool isVolatile,
                                            const Value *DstSV,
-                                           uint64_t DstSVOff) {
+                                           uint64_t DstSVOff) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 
   // If not DWORD aligned or size is more than the threshold, call the library.
@@ -6689,8 +6739,10 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                       SDValue Chain, SDValue Dst, SDValue Src,
                                       SDValue Size, unsigned Align,
                                       bool isVolatile, bool AlwaysInline,
-                                      const Value *DstSV, uint64_t DstSVOff,
-                                      const Value *SrcSV, uint64_t SrcSVOff) {
+                                      const Value *DstSV,
+                                      uint64_t DstSVOff,
+                                      const Value *SrcSV,
+                                      uint64_t SrcSVOff) const {
   // This requires the copy size to be a constant, preferrably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6720,7 +6772,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                             Count, InFlag);
   InFlag = Chain.getValue(1);
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
-                                                             X86::EDI,
+                                                              X86::EDI,
                             Dst, InFlag);
   InFlag = Chain.getValue(1);
   Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
@@ -6756,14 +6808,18 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                      &Results[0], Results.size());
 }
 
-SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   DebugLoc dl = Op.getDebugLoc();
 
   if (!Subtarget->is64Bit()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
-    SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                   getPointerTy());
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
                         false, false, 0);
   }
@@ -6777,7 +6833,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
   SDValue Store = DAG.getStore(Op.getOperand(0), dl,
-                               DAG.getConstant(VarArgsGPOffset, MVT::i32),
+                               DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
+                                               MVT::i32),
                                FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
@@ -6785,14 +6842,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
   Store = DAG.getStore(Op.getOperand(0), dl,
-                       DAG.getConstant(VarArgsFPOffset, MVT::i32),
+                       DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+                                       MVT::i32),
                        FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
-  SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+  SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                    getPointerTy());
   Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
                        false, false, 0);
   MemOps.push_back(Store);
@@ -6800,7 +6859,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(8));
-  SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+  SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                    getPointerTy());
   Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
                        false, false, 0);
   MemOps.push_back(Store);
@@ -6808,18 +6868,18 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
                      &MemOps[0], MemOps.size());
 }
 
-SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
   assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
   SDValue Chain = Op.getOperand(0);
   SDValue SrcPtr = Op.getOperand(1);
   SDValue SrcSV = Op.getOperand(2);
 
-  llvm_report_error("VAArgInst is not yet implemented for x86-64!");
+  report_fatal_error("VAArgInst is not yet implemented for x86-64!");
   return SDValue();
 }
 
-SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
   assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
   SDValue Chain = Op.getOperand(0);
@@ -6835,7 +6895,7 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue
-X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   switch (IntNo) {
@@ -7076,7 +7136,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
-SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
 
@@ -7097,7 +7158,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
                      RetAddrFI, NULL, 0, false, false, 0);
 }
 
-SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
@@ -7112,12 +7173,11 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
-                                                     SelectionDAG &DAG) {
+                                                     SelectionDAG &DAG) const {
   return DAG.getIntPtrConstant(2*TD->getPointerSize());
 }
 
-SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
-{
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   SDValue Chain     = Op.getOperand(0);
   SDValue Offset    = Op.getOperand(1);
@@ -7141,7 +7201,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   SDValue Root = Op.getOperand(0);
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
@@ -7232,7 +7292,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
             InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
 
         if (InRegCount > 2) {
-          llvm_report_error("Nest register in use - reduce number of inreg parameters!");
+          report_fatal_error("Nest register in use - reduce number of inreg parameters!");
         }
       }
       break;
@@ -7281,7 +7341,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
   }
 }
 
-SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
   /*
    The rounding mode is in bits 11:10 of FPSR, and has the following
    settings:
@@ -7343,7 +7404,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
 }
 
-SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
@@ -7377,7 +7438,7 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
   return Op;
 }
 
-SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
@@ -7407,7 +7468,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
   return Op;
 }
 
-SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
   DebugLoc dl = Op.getDebugLoc();
@@ -7452,7 +7513,7 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
 }
 
 
-SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
   // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
   // looks for this combo and may remove the "setcc" instruction if the "setcc"
@@ -7520,7 +7581,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   return Sum;
 }
 
-SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
   EVT T = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned Reg = 0;
@@ -7551,7 +7612,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
-                                                 SelectionDAG &DAG) {
+                                                 SelectionDAG &DAG) const {
   assert(Subtarget->is64Bit() && "Result not type legalized?");
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
   SDValue TheChain = Op.getOperand(0);
@@ -7569,7 +7630,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
-SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   EVT T = Node->getValueType(0);
@@ -7585,7 +7646,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
-SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
@@ -7643,7 +7704,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
 void X86TargetLowering::
 ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
-                        SelectionDAG &DAG, unsigned NewOp) {
+                        SelectionDAG &DAG, unsigned NewOp) const {
   EVT T = Node->getValueType(0);
   DebugLoc dl = Node->getDebugLoc();
   assert (T == MVT::i64 && "Only know how to expand i64 atomics");
@@ -7668,7 +7729,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
 /// with a new node built out of custom code.
 void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) {
+                                           SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
@@ -7941,9 +8002,9 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
 bool
 X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
                                       EVT VT) const {
-  // Only do shuffles on 128-bit vector types for now.
+  // Very little shuffling can be done for 64-bit vectors right now.
   if (VT.getSizeInBits() == 64)
-    return false;
+    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
 
   // FIXME: pshufb, blends, shifts.
   return (VT.getVectorNumElements() == 2 ||
@@ -8448,8 +8509,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
 
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
-                                     MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                     MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -8478,12 +8538,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   F->insert(It, sinkMBB);
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
@@ -8495,27 +8552,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   //  copy0MBB:
   //   %FalseValue = ...
   //   # fallthrough to sinkMBB
-  BB = copy0MBB;
-
-  // Update machine-CFG edges
-  BB->addSuccessor(sinkMBB);
+  copy0MBB->addSuccessor(sinkMBB);
 
   //  sinkMBB:
   //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
   //  ...
-  BB = sinkMBB;
-  BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+  BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
     .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
     .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
 
   F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
-  return BB;
+  return sinkMBB;
 }
 
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
-                                          MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                          MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *F = BB->getParent();
@@ -8538,12 +8590,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
 
 MachineBasicBlock *
 X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                               MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                               MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
   case X86::MINGW_ALLOCA:
-    return EmitLoweredMingwAlloca(MI, BB, EM);
+    return EmitLoweredMingwAlloca(MI, BB);
   case X86::CMOV_GR8:
   case X86::CMOV_V1I64:
   case X86::CMOV_FR32:
@@ -8556,7 +8607,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::CMOV_RFP32:
   case X86::CMOV_RFP64:
   case X86::CMOV_RFP80:
-    return EmitLoweredSelect(MI, BB, EM);
+    return EmitLoweredSelect(MI, BB);
 
   case X86::FP32_TO_INT16_IN_MEM:
   case X86::FP32_TO_INT32_IN_MEM:
@@ -8638,21 +8689,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
     return BB;
   }
-    // DBG_VALUE.  Only the frame index case is done here.
-  case X86::DBG_VALUE: {
-    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-    DebugLoc DL = MI->getDebugLoc();
-    X86AddressMode AM;
-    MachineFunction *F = BB->getParent();
-    AM.BaseType = X86AddressMode::FrameIndexBase;
-    AM.Base.FrameIndex = MI->getOperand(0).getImm();
-    addFullAddress(BuildMI(BB, DL, TII->get(X86::DBG_VALUE)), AM).
-      addImm(MI->getOperand(1).getImm()).
-      addMetadata(MI->getOperand(2).getMetadata());
-    F->DeleteMachineInstr(MI);      // Remove pseudo.
-    return BB;
-  }
-
     // String/text processing lowering.
   case X86::PCMPISTRM128REG:
     return EmitPCMP(MI, BB, 3, false /* in-mem */);
@@ -8874,7 +8910,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
 bool X86TargetLowering::isGAPlusOffset(SDNode *N,
-                                       GlobalValue* &GA, int64_t &Offset) const{
+                                       const GlobalValue* &GA,
+                                       int64_t &Offset) const {
   if (N->getOpcode() == X86ISD::Wrapper) {
     if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
       GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
@@ -9558,9 +9595,13 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
 }
 
 static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI,
                                 const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
   EVT VT = N->getValueType(0);
-  if (VT != MVT::i64 || !Subtarget->is64Bit())
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
     return SDValue();
 
   // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
@@ -9570,6 +9611,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
     std::swap(N0, N1);
   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
     return SDValue();
+  if (!N0.hasOneUse() || !N1.hasOneUse())
+    return SDValue();
 
   SDValue ShAmt0 = N0.getOperand(1);
   if (ShAmt0.getValueType() != MVT::i8)
@@ -9592,10 +9635,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
     std::swap(ShAmt0, ShAmt1);
   }
 
+  unsigned Bits = VT.getSizeInBits();
   if (ShAmt1.getOpcode() == ISD::SUB) {
     SDValue Sum = ShAmt1.getOperand(0);
     if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
-      if (SumC->getSExtValue() == 64 &&
+      if (SumC->getSExtValue() == Bits &&
           ShAmt1.getOperand(1) == ShAmt0)
         return DAG.getNode(Opc, DL, VT,
                            Op0, Op1,
@@ -9605,7 +9649,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
     ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
     if (ShAmt0C &&
-        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64)
+        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
       return DAG.getNode(Opc, DL, VT,
                          N0.getOperand(0), N1.getOperand(0),
                          DAG.getNode(ISD::TRUNCATE, DL,
@@ -9769,8 +9813,9 @@ static SDValue PerformBTCombine(SDNode *N,
     unsigned BitWidth = Op1.getValueSizeInBits();
     APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
     APInt KnownZero, KnownOne;
-    TargetLowering::TargetLoweringOpt TLO(DAG);
-    TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                          !DCI.isBeforeLegalizeOps());
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
         TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
       DCI.CommitTargetLoweringOpt(TLO);
@@ -9883,7 +9928,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
-  case ISD::OR:             return PerformOrCombine(N, DAG, Subtarget);
+  case ISD::OR:             return PerformOrCombine(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
   case X86ISD::FOR:         return PerformFORCombine(N, DAG);
@@ -9897,6 +9942,111 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   return SDValue();
 }
 
+/// isTypeDesirableForOp - Return true if the target has native support for
+/// the specified value type and it is 'desirable' to use the type for the
+/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+/// instruction encodings are longer and some i16 instructions are slow.
+bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+  if (!isTypeLegal(VT))
+    return false;
+  if (VT != MVT::i16)
+    return true;
+
+  switch (Opc) {
+  default:
+    return true;
+  case ISD::LOAD:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SUB:
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    return false;
+  }
+}
+
+static bool MayFoldLoad(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+/// IsDesirableToPromoteOp - This method query the target whether it is
+/// beneficial for dag combiner to promote the specified node. If true, it
+/// should return the desired promotion type by reference.
+bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+  EVT VT = Op.getValueType();
+  if (VT != MVT::i16)
+    return false;
+
+  bool Promote = false;
+  bool Commute = false;
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    // If the non-extending load has a single use and it's not live out, then it
+    // might be folded.
+    if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
+                                                     Op.hasOneUse()*/) {
+      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+             UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+        // The only case where we'd want to promote LOAD (rather then it being
+        // promoted as an operand is when it's only use is liveout.
+        if (UI->getOpcode() != ISD::CopyToReg)
+          return false;
+      }
+    }
+    Promote = true;
+    break;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Promote = true;
+    break;
+  case ISD::SHL:
+  case ISD::SRL: {
+    SDValue N0 = Op.getOperand(0);
+    // Look out for (store (shl (load), x)).
+    if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
+      return false;
+    Promote = true;
+    break;
+  }
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    Commute = true;
+    // fallthrough
+  case ISD::SUB: {
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    if (!Commute && MayFoldLoad(N1))
+      return false;
+    // Avoid disabling potential load folding opportunities.
+    if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
+      return false;
+    if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
+      return false;
+    Promote = true;
+  }
+  }
+
+  PVT = MVT::i32;
+  return Promote;
+}
+
 //===----------------------------------------------------------------------===//
 //                           X86 Inline Assembly Support
 //===----------------------------------------------------------------------===//
@@ -10159,7 +10309,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       return;
     }
 
-    GlobalValue *GV = GA->getGlobal();
+    const GlobalValue *GV = GA->getGlobal();
     // If we require an extra load to get this address, as in PIC mode, we
     // can't accept it.
     if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 1026480..440601f9 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -374,12 +374,6 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   //  X86TargetLowering - X86 Implementation of the TargetLowering interface
   class X86TargetLowering : public TargetLowering {
-    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-    int RegSaveFrameIndex;            // X86-64 vararg func register save area.
-    unsigned VarArgsGPOffset;         // X86-64 vararg func int reg offset.
-    unsigned VarArgsFPOffset;         // X86-64 vararg func fp reg offset.
-    int BytesToPopOnReturn;           // Number of arg bytes ret should pop.
-
   public:
     explicit X86TargetLowering(X86TargetMachine &TM);
 
@@ -401,11 +395,6 @@ namespace llvm {
     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                  unsigned JTI, MCContext &Ctx) const;
     
-    // Return the number of bytes that a function should pop when it returns (in
-    // addition to the space used by the return address).
-    //
-    unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
-
     /// getStackPtrReg - Return the stack pointer register we are using: either
     /// ESP or RSP.
     unsigned getStackPtrReg() const { return X86StackPtr; }
@@ -424,12 +413,14 @@ namespace llvm {
     /// probably because the source does not need to be loaded. If
     /// 'NonScalarIntSafe' is true, that means it's safe to return a
     /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-    /// from memory. It returns EVT::Other if SelectionDAG should be responsible
-    /// for determining it.
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size,
-                        unsigned DstAlign, unsigned SrcAlign,
-                        bool NonScalarIntSafe, SelectionDAG &DAG) const;
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
@@ -439,20 +430,32 @@ namespace llvm {
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    /// isTypeDesirableForOp - Return true if the target has native support for
+    /// the specified value type and it is 'desirable' to use the type for the
+    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+    /// instruction encodings are longer and some i16 instructions are slow.
+    virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
+
+    /// isTypeDesirable - Return true if the target has native support for the
+    /// specified value type and it is 'desirable' to use the type. e.g. On x86
+    /// i16 is legal, but undesirable since i16 instruction encodings are longer
+    /// and some i16 instructions are slow.
+    virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
  
     /// getTargetNodeName - This method returns the name of a target specific
@@ -473,9 +476,9 @@ namespace llvm {
                                                 unsigned Depth = 0) const;
 
     virtual bool
-    isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
+    isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
     
-    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     virtual bool ExpandInlineAsm(CallInst *CI) const;
     
@@ -560,7 +563,7 @@ namespace llvm {
       return !X86ScalarSSEf64 || VT == MVT::f80;
     }
     
-    virtual const X86Subtarget* getSubtarget() {
+    virtual const X86Subtarget* getSubtarget() const {
       return Subtarget;
     }
 
@@ -577,11 +580,12 @@ namespace llvm {
     createFastISel(MachineFunction &mf,
                    DenseMap<const Value *, unsigned> &,
                    DenseMap<const BasicBlock *, MachineBasicBlock *> &,
-                   DenseMap<const AllocaInst *, int> &
+                   DenseMap<const AllocaInst *, int> &,
+                   std::vector<std::pair<MachineInstr*, unsigned> > &
 #ifndef NDEBUG
-                   , SmallSet<Instruction*, 8> &
+                   , SmallSet<const Instruction *, 8> &
 #endif
-                   );
+                   ) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
@@ -616,17 +620,17 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
+                            SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerMemArgument(SDValue Chain,
                              CallingConv::ID CallConv,
                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
                              DebugLoc dl, SelectionDAG &DAG,
                              const CCValAssign &VA,  MachineFrameInfo *MFI,
-                              unsigned i);
+                              unsigned i) const;
     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
                              DebugLoc dl, SelectionDAG &DAG,
                              const CCValAssign &VA,
-                             ISD::ArgFlagsTy Flags);
+                             ISD::ArgFlagsTy Flags) const;
 
     // Call lowering helpers.
 
@@ -641,114 +645,120 @@ namespace llvm {
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                            SelectionDAG& DAG) const;
-    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv);
+    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
                                 SDValue Chain, bool IsTailCall, bool Is64Bit,
-                                int FPDiff, DebugLoc dl);
+                                int FPDiff, DebugLoc dl) const;
 
     CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
-    unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
+    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
+                                         SelectionDAG &DAG) const;
 
     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                               bool isSigned);
+                                               bool isSigned) const;
 
     SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
-                                   SelectionDAG &DAG);
-    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
+                                   SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
                                int64_t Offset, SelectionDAG &DAG) const;
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
-                      SelectionDAG &DAG);
-    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
-
-    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG);
+                      SelectionDAG &DAG) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerToBT(SDValue And, ISD::CondCode CC,
+                      DebugLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     virtual bool
       CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<EVT> &OutTys,
                      const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-                     SelectionDAG &DAG);
+                     SelectionDAG &DAG) const;
 
     void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                 SelectionDAG &DAG, unsigned NewOp);
+                                 SelectionDAG &DAG, unsigned NewOp) const;
 
     SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                     SDValue Chain,
                                     SDValue Dst, SDValue Src,
                                     SDValue Size, unsigned Align,
                                     bool isVolatile,
-                                    const Value *DstSV, uint64_t DstSVOff);
+                                    const Value *DstSV,
+                                    uint64_t DstSVOff) const;
     SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                     SDValue Chain,
                                     SDValue Dst, SDValue Src,
                                     SDValue Size, unsigned Align,
                                     bool isVolatile, bool AlwaysInline,
-                                    const Value *DstSV, uint64_t DstSVOff,
-                                    const Value *SrcSV, uint64_t SrcSVOff);
+                                    const Value *DstSV,
+                                    uint64_t DstSVOff,
+                                    const Value *SrcSV,
+                                    uint64_t SrcSVOff) const;
     
     /// Utility function to emit string processing sse4.2 instructions
     /// that return in xmm0.
@@ -796,30 +806,29 @@ namespace llvm {
                                                    MachineBasicBlock *BB) const;
 
     MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
-                                         MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                         MachineBasicBlock *BB) const;
 
     MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
-                                              MachineBasicBlock *BB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+                                              MachineBasicBlock *BB) const;
 
     /// Emit nodes that will be selected as "test Op0,Op0", or something
     /// equivalent, for use with the given x86 condition code.
-    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
+    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
 
     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
     /// equivalent, for use with the given x86 condition code.
     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
-                    SelectionDAG &DAG);
+                    SelectionDAG &DAG) const;
   };
 
   namespace X86 {
     FastISel *createFastISel(MachineFunction &mf,
                            DenseMap<const Value *, unsigned> &,
                            DenseMap<const BasicBlock *, MachineBasicBlock *> &,
-                           DenseMap<const AllocaInst *, int> &
+                           DenseMap<const AllocaInst *, int> &,
+                           std::vector<std::pair<MachineInstr*, unsigned> > &
 #ifndef NDEBUG
-                           , SmallSet<Instruction*, 8> &
+                           , SmallSet<const Instruction*, 8> &
 #endif
                            );
   }
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index eef2ca0..f5c3dbf 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -2086,6 +2086,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
+                                                                   GR32_ABCD)),
+                                             x86_subreg_8bit_hi))>,
+      Requires<[In64BitMode]>;
 def : Pat<(srl GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32_NOREXrr8
@@ -2156,21 +2161,6 @@ def : Pat<(sra GR64:$src1, (and CL, 63)),
 def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SAR64mCL addr:$dst)>;
 
-// Double shift patterns
-def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
-          (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
-                       GR64:$src2, (i8 imm)), addr:$dst),
-          (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
-          (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
-                       GR64:$src2, (i8 imm)), addr:$dst),
-          (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
 let AddedComplexity = 5 in {  // Try this before the selecting to OR
 def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
@@ -2294,7 +2284,7 @@ def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))]>;
-def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
 
@@ -2347,15 +2337,3 @@ let isTwoAddress = 1 in {
 }
 
 defm PINSRQ      : SS41I_insert64<0x22, "pinsrq">;
-
-// -disable-16bit support.
-def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst),
-          (MOV16mi addr:$dst, imm:$src)>;
-def : Pat<(truncstorei16 GR64:$src, addr:$dst),
-          (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
-def : Pat<(i64 (sextloadi16 addr:$dst)),
-          (MOVSX64rm16 addr:$dst)>;
-def : Pat<(i64 (zextloadi16 addr:$dst)),
-          (MOVZX64rm16 addr:$dst)>;
-def : Pat<(i64 (extloadi16 addr:$dst)),
-          (MOVZX64rm16 addr:$dst)>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index c475b56..5a82a7b 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -49,7 +49,7 @@ struct X86AddressMode {
   unsigned Scale;
   unsigned IndexReg;
   int Disp;
-  GlobalValue *GV;
+  const GlobalValue *GV;
   unsigned GVOpFlags;
 
   X86AddressMode()
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index e6d1fee..0aae4a8 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -327,8 +327,8 @@ def TST_F  : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
 
 // Versions of FP instructions that take a single memory operand.  Added for the
 //   disassembler; remove as they are included with patterns elsewhere.
-def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{l}\t$src">;
-def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{l}\t$src">;
+def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
 
 def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
 def FSTENVm  : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
@@ -336,15 +336,15 @@ def FSTENVm  : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
 def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
 def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
 
-def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">;
-def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">;
+def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
 
 def FRSTORm  : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
 def FSAVEm   : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
 def FNSTSWm  : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
 
-def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">;
-def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">;
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
 
 def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
 def FBSTPm   : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ccb7b05..a21bfb9 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1684,6 +1685,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   // Start from the bottom of the block and work up, examining the
   // terminator instructions.
   MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
   while (I != MBB.begin()) {
     --I;
     if (I->isDebugValue())
@@ -1701,6 +1703,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Handle unconditional branches.
     if (I->getOpcode() == X86::JMP_4) {
+      UnCondBrIter = I;
+
       if (!AllowModify) {
         TBB = I->getOperand(0).getMBB();
         continue;
@@ -1718,10 +1722,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
         TBB = 0;
         I->eraseFromParent();
         I = MBB.end();
+        UnCondBrIter = MBB.end();
         continue;
       }
 
-      // TBB is used to indicate the unconditinal destination.
+      // TBB is used to indicate the unconditional destination.
       TBB = I->getOperand(0).getMBB();
       continue;
     }
@@ -1733,6 +1738,45 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Working from the bottom, handle the first conditional branch.
     if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+        // If we can modify the code and it ends in something like:
+        //
+        //     jCC L1
+        //     jmp L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Then we can change this to:
+        //
+        //     jnCC L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Which is a bit more efficient.
+        // We conditionally jump to the fall-through block.
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        unsigned JNCC = GetCondBranchFromCond(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB());
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        // Restart the analysis.
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+
       FBB = TBB;
       TBB = I->getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(BranchCode));
@@ -2276,6 +2320,19 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  X86AddressMode AM;
+  AM.BaseType = X86AddressMode::FrameIndexBase;
+  AM.Base.FrameIndex = FrameIx;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+  addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
                                      const SmallVectorImpl<MachineOperand> &MOs,
                                      MachineInstr *MI,
@@ -2586,7 +2643,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
     else
       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
-    Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+    const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
                     Constant::getAllOnesValue(Ty) :
                     Constant::getNullValue(Ty);
     unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -3406,6 +3463,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     }
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
       break;
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
@@ -3603,7 +3661,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Cannot determine size: " << MI;
-    llvm_report_error(Msg.str());
+    report_fatal_error(Msg.str());
   }
   
 
@@ -3709,3 +3767,9 @@ void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
   assert(table && "Cannot change domain");
   MI->setDesc(get(table[Domain-1]));
 }
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(X86::NOOP);
+}
+
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f0bdd06..df99c7f 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -623,6 +623,12 @@ public:
                                            MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI) const;
   
+  virtual
+  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                         int FrameIx, uint64_t Offset,
+                                         const MDNode *MDPtr,
+                                         DebugLoc DL) const;
+
   /// foldMemoryOperand - If this target supports it, fold a load or store of
   /// the specified stack slot into the specified machine instruction for the
   /// specified operand(s).  If this is possible, the target should perform the
@@ -687,6 +693,8 @@ public:
                                        int64_t Offset1, int64_t Offset2,
                                        unsigned NumLoads) const;
 
+  virtual void getNoopForMachoTarget(MCInst &NopInst) const;
+
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 940b439..a2754ea 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -487,34 +487,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
   return (~KnownZero0 & ~KnownZero1) == 0;
 }]>;
 
-// 'shld' and 'shrd' instruction patterns. Note that even though these have
-// the srl and shl in their patterns, the C++ code must still check for them,
-// because predicates are tested before children nodes are explored.
-
-def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
-                   (or (srl node:$src1, node:$amt1),
-                       (shl node:$src2, node:$amt2)), [{
-  assert(N->getOpcode() == ISD::OR);
-  return N->getOperand(0).getOpcode() == ISD::SRL &&
-         N->getOperand(1).getOpcode() == ISD::SHL &&
-         isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
-         isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
-         N->getOperand(0).getConstantOperandVal(1) ==
-         N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
-}]>;
-
-def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
-                   (or (shl node:$src1, node:$amt1),
-                       (srl node:$src2, node:$amt2)), [{
-  assert(N->getOpcode() == ISD::OR);
-  return N->getOperand(0).getOpcode() == ISD::SHL &&
-         N->getOperand(1).getOpcode() == ISD::SRL &&
-         isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
-         isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
-         N->getOperand(0).getConstantOperandVal(1) ==
-         N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
-}]>;
-
 //===----------------------------------------------------------------------===//
 // Instruction list...
 //
@@ -781,11 +753,11 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
 }
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH32i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
-                     "push{l}\t$imm", []>;
-def PUSH32i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), 
                       "push{l}\t$imm", []>;
-def PUSH32i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{w}\t$imm", []>, OpSize;
+def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
                       "push{l}\t$imm", []>;
 }
 
@@ -809,10 +781,11 @@ let isTwoAddress = 1 in                               // GR32 = bswap GR32
 let Defs = [EFLAGS] in {
 def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize;
 def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB,
+                 OpSize;
 def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
@@ -822,10 +795,11 @@ def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 
 def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize;
 def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
-                 [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB;
+                 [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB,
+                 OpSize;
 def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
@@ -4476,7 +4450,11 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
 // avoid partial-register updates.
 def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
 def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
-def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+
 
 //===----------------------------------------------------------------------===//
 // Some peepholes
@@ -4537,11 +4515,11 @@ def : Pat<(i8 (trunc GR16:$src)),
 
 // h-register tricks
 def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                           x86_subreg_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                           x86_subreg_8bit_hi)>,
       Requires<[In32BitMode]>;
 def : Pat<(srl GR16:$src, (i8 8)),
@@ -4566,6 +4544,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
                                                              GR32_ABCD)),
                                       x86_subreg_8bit_hi))>,
       Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
+                                                             GR32_ABCD)),
+                                      x86_subreg_8bit_hi))>,
+      Requires<[In32BitMode]>;
 
 // (shl x, 1) ==> (add x, x)
 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
@@ -4612,111 +4595,13 @@ def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
 def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
           (SAR32mCL addr:$dst)>;
 
-// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
-def : Pat<(or (srl GR32:$src1, CL:$amt),
-              (shl GR32:$src2, (sub 32, CL:$amt))),
-          (SHRD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
-                     (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
-          (SHRD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
-              (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-          (SHRD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
-                     (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-                 addr:$dst),
-          (SHRD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
-          (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
-                       GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
-
-// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
-def : Pat<(or (shl GR32:$src1, CL:$amt),
-              (srl GR32:$src2, (sub 32, CL:$amt))),
-          (SHLD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
-                     (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
-          (SHLD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
-              (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-          (SHLD32rrCL GR32:$src1, GR32:$src2)>;
-
-def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
-                     (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
-                 addr:$dst),
-          (SHLD32mrCL addr:$dst, GR32:$src2)>;
-
-def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
-          (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
-                       GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
-
-// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
-def : Pat<(or (srl GR16:$src1, CL:$amt),
-              (shl GR16:$src2, (sub 16, CL:$amt))),
-          (SHRD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
-                     (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
-          (SHRD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
-              (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-          (SHRD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
-                     (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-                 addr:$dst),
-          (SHRD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
-          (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
-                       GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
-
-// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
-def : Pat<(or (shl GR16:$src1, CL:$amt),
-              (srl GR16:$src2, (sub 16, CL:$amt))),
-          (SHLD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
-                     (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
-          (SHLD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
-              (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-          (SHLD16rrCL GR16:$src1, GR16:$src2)>;
-
-def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
-                     (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
-                 addr:$dst),
-          (SHLD16mrCL addr:$dst, GR16:$src2)>;
-
-def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
-          (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
-
-def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
-                       GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
-          (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
-
 // (anyext (setcc_carry)) -> (setcc_carry)
 def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
           (SETB_C16r)>;
 def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
           (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
 
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
 let AddedComplexity = 5 in { // Try this before the selecting to OR
@@ -4907,18 +4792,6 @@ def : Pat<(and GR16:$src1, i16immSExt8:$src2),
 def : Pat<(and GR32:$src1, i32immSExt8:$src2),
           (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// -disable-16bit support.
-def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst),
-          (MOV16mi addr:$dst, imm:$src)>;
-def : Pat<(truncstorei16 GR32:$src, addr:$dst),
-          (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
-def : Pat<(i32 (sextloadi16 addr:$dst)),
-          (MOVSX32rm16 addr:$dst)>;
-def : Pat<(i32 (zextloadi16 addr:$dst)),
-          (MOVZX32rm16 addr:$dst)>;
-def : Pat<(i32 (extloadi16 addr:$dst)),
-          (MOVZX32rm16 addr:$dst)>;
-
 //===----------------------------------------------------------------------===//
 // Floating Point Stack Support
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 1c81c5e..744af50 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -117,7 +117,7 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
 def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVQ64gmr : MMXRI<0x7E, MRMDestMem, (outs), 
+def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), 
                          (ins i64mem:$dst, VR64:$src),
                          "movq\t{$src, $dst|$dst, $src}", []>;
 
@@ -167,6 +167,9 @@ let neverHasSideEffects = 1 in
 def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
                            "movq2dq\t{$src, $dst|$dst, $src}", []>;
 
+def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+                           "movdq2q\t{$src, $dst|$dst, $src}", []>;
+
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                          "movntq\t{$src, $dst|$dst, $src}",
                          [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
@@ -569,6 +572,14 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
 def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
 
 let AddedComplexity = 20 in {
   def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 11f7e27..2129580 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2878,6 +2878,7 @@ let Constraints = "$src1 = $dst" in {
   }
 }
 
+let ImmT = NoImm in {  // None of these have i8 immediate fields.
 defm PHADDW      : SS3I_binop_rm_int_16<0x01, "phaddw",
                                         int_x86_ssse3_phadd_w,
                                         int_x86_ssse3_phadd_w_128>;
@@ -2902,6 +2903,7 @@ defm PMADDUBSW   : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
 defm PMULHRSW    : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
                                         int_x86_ssse3_pmul_hr_sw,
                                         int_x86_ssse3_pmul_hr_sw_128, 1>;
+                                        
 defm PSHUFB      : SS3I_binop_rm_int_8 <0x00, "pshufb",
                                         int_x86_ssse3_pshuf_b,
                                         int_x86_ssse3_pshuf_b_128>;
@@ -2914,7 +2916,9 @@ defm PSIGNW      : SS3I_binop_rm_int_16<0x09, "psignw",
 defm PSIGND      : SS3I_binop_rm_int_32<0x0A, "psignd",
                                         int_x86_ssse3_psign_d,
                                         int_x86_ssse3_psign_d_128>;
+}
 
+// palignr patterns.
 let Constraints = "$src1 = $dst" in {
   def PALIGNR64rr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
                            (ins VR64:$src1, VR64:$src2, i8imm:$src3),
@@ -2935,26 +2939,29 @@ let Constraints = "$src1 = $dst" in {
                            []>, OpSize;
 }
 
-// palignr patterns.
-def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)),
-          (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>,
+let AddedComplexity = 5 in {
+
+def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
-def : Pat<(int_x86_ssse3_palign_r VR64:$src1,
-                                      (memop64 addr:$src2),
-                                      (i8 imm:$src3)),
-          (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
+def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
-
-def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)),
-          (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>,
+def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
-def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1,
-                                      (memopv2i64 addr:$src2),
-                                      (i8 imm:$src3)),
-          (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>,
+def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
+          Requires<[HasSSSE3]>;
+def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)),
+          (PALIGNR64rr VR64:$src2, VR64:$src1,
+                       (SHUFFLE_get_palign_imm VR64:$src3))>,
           Requires<[HasSSSE3]>;
 
-let AddedComplexity = 5 in {
 def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
           (PALIGNR128rr VR128:$src2, VR128:$src1,
                         (SHUFFLE_get_palign_imm VR128:$src3))>,
@@ -3510,7 +3517,7 @@ defm DPPS         : SS41I_binop_rmi_int<0x40, "dpps",
 defm DPPD         : SS41I_binop_rmi_int<0x41, "dppd",
                                         int_x86_sse41_dppd, 1>;
 defm MPSADBW      : SS41I_binop_rmi_int<0x42, "mpsadbw",
-                                        int_x86_sse41_mpsadbw, 1>;
+                                        int_x86_sse41_mpsadbw, 0>;
 
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index d257ee3..2b8720b 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -14,6 +14,7 @@
 #include "X86MCAsmInfo.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
@@ -95,9 +96,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
     Data64bitsDirective = 0;
 }
 
-MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
-  return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
-                              0, SectionKind::getMetadata(), false, Ctx);
+const MCSection *X86ELFMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const {
+  return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata(), false);
 }
 
 X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index 69716bf..5815225 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -27,7 +27,7 @@ namespace llvm {
 
   struct X86ELFMCAsmInfo : public MCAsmInfo {
     explicit X86ELFMCAsmInfo(const Triple &Triple);
-    virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
   };
 
   struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 4b2529b..06043ec 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -31,7 +31,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// stack frame in bytes.
   unsigned CalleeSavedFrameSize;
 
-  /// BytesToPopOnReturn - Number of bytes function pops on return.
+  /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
+  /// to the space used by the return address).
   /// Used on windows platform for stdcall & fastcall name decoration
   unsigned BytesToPopOnReturn;
 
@@ -52,6 +53,19 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// relocation models.
   unsigned GlobalBaseReg;
 
+  /// ReserveFP - whether the function should reserve the frame pointer
+  /// when allocating, even if there may not actually be a frame pointer used.
+  bool ReserveFP;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// RegSaveFrameIndex - X86-64 vararg func register save area.
+  int RegSaveFrameIndex;
+  /// VarArgsGPOffset - X86-64 vararg func int reg offset.
+  unsigned VarArgsGPOffset;
+  /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
+  unsigned VarArgsFPOffset;
+
 public:
   X86MachineFunctionInfo() : ForceFramePointer(false),
                              CalleeSavedFrameSize(0),
@@ -59,7 +73,11 @@ public:
                              ReturnAddrIndex(0),
                              TailCallReturnAddrDelta(0),
                              SRetReturnReg(0),
-                             GlobalBaseReg(0) {}
+                             GlobalBaseReg(0),
+                             VarArgsFrameIndex(0),
+                             RegSaveFrameIndex(0),
+                             VarArgsGPOffset(0),
+                             VarArgsFPOffset(0) {}
   
   explicit X86MachineFunctionInfo(MachineFunction &MF)
     : ForceFramePointer(false),
@@ -68,7 +86,12 @@ public:
       ReturnAddrIndex(0),
       TailCallReturnAddrDelta(0),
       SRetReturnReg(0),
-      GlobalBaseReg(0) {}
+      GlobalBaseReg(0),
+      ReserveFP(false),
+      VarArgsFrameIndex(0),
+      RegSaveFrameIndex(0),
+      VarArgsGPOffset(0),
+      VarArgsFPOffset(0) {}
   
   bool getForceFramePointer() const { return ForceFramePointer;} 
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -90,6 +113,21 @@ public:
 
   unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
   void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  bool getReserveFP() const { return ReserveFP; }
+  void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+
+  int getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+  void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; }
+
+  unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; }
+  void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; }
+
+  unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
+  void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 32f28a5..a3e04b0 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -439,7 +439,7 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const MachineModuleInfo &MMI = MF.getMMI();
 
-  return (NoFramePointerElim ||
+  return (DisableFramePointerElim(MF) ||
           needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() ||
@@ -464,7 +464,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   //        variable-sized allocas.
   // FIXME: Temporary disable the error - it seems to be too conservative.
   if (0 && requiresRealignment && MFI->hasVarSizedObjects())
-    llvm_report_error(
+    report_fatal_error(
       "Stack realignment in presense of dynamic allocas is not supported");
 
   return (requiresRealignment && !MFI->hasVarSizedObjects());
@@ -608,8 +608,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FrameIndex = MI.getOperand(i).getIndex();
   unsigned BasePtr;
 
+  unsigned Opc = MI.getOpcode();
+  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
   if (needsStackRealignment(MF))
     BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+  else if (AfterFPPop)
+    BasePtr = StackPtr;
   else
     BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
 
@@ -618,16 +622,22 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(i).ChangeToRegister(BasePtr, false);
 
   // Now add the frame object offset to the offset from EBP.
+  int FIOffset;
+  if (AfterFPPop) {
+    // Tail call jmp happens after FP is popped.
+    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+    const MachineFrameInfo *MFI = MF.getFrameInfo();
+    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
+  } else
+    FIOffset = getFrameIndexOffset(MF, FrameIndex);
+
   if (MI.getOperand(i+3).isImm()) {
     // Offset is a 32-bit integer.
-    int Offset = getFrameIndexOffset(MF, FrameIndex) +
-      (int)(MI.getOperand(i + 3).getImm());
-
+    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
     MI.getOperand(i + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
-    uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
-                      (uint64_t)MI.getOperand(i+3).getOffset();
+    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
     MI.getOperand(i+3).setOffset(Offset);
   }
   return 0;
@@ -1487,3 +1497,46 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
 }
 
 #include "X86GenRegisterInfo.inc"
+
+namespace {
+  struct MSAH : public MachineFunctionPass {
+    static char ID;
+    MSAH() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      const X86TargetMachine *TM =
+        static_cast<const X86TargetMachine *>(&MF.getTarget());
+      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
+      MachineRegisterInfo &RI = MF.getRegInfo();
+      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned StackAlignment = X86RI->getStackAlignment();
+
+      // Be over-conservative: scan over all vreg defs and find whether vector
+      // registers are used. If yes, there is a possibility that vector register
+      // will be spilled and thus require dynamic stack realignment.
+      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+           RegNum < RI.getLastVirtReg(); ++RegNum)
+        if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
+          FuncInfo->setReserveFP(true);
+          return true;
+        }
+
+      // Nothing to do
+      return false;
+    }
+
+    virtual const char *getPassName() const {
+      return "X86 Maximal Stack Alignment Check";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+
+  char MSAH::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 76b8f7a..49a6ca0 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -352,11 +352,12 @@ def GR8 : RegisterClass<"X86", [i8],  8,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
       if (!Subtarget.is64Bit())
         // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
         return begin() + 8;
-      else if (RI->hasFP(MF))
+      else if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate SPL or BPL.
         return array_endof(X86_GR8_AO_64) - 1;
       else
@@ -396,9 +397,10 @@ def GR16 : RegisterClass<"X86", [i16], 16,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate SP or BP.
           return array_endof(X86_GR16_AO_64) - 1;
         else
@@ -406,7 +408,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
           return array_endof(X86_GR16_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate SP or BP.
           return begin() + 6;
         else
@@ -447,9 +449,10 @@ def GR32 : RegisterClass<"X86", [i32], 32,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate ESP or EBP.
           return array_endof(X86_GR32_AO_64) - 1;
         else
@@ -457,7 +460,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
           return array_endof(X86_GR32_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate ESP or EBP.
           return begin() + 6;
         else
@@ -484,9 +487,11 @@ def GR64 : RegisterClass<"X86", [i64], 64,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
-      if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+      // Does the function dedicate RBP to being a frame ptr?
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         return end()-3;  // If so, don't allocate RIP, RSP or RBP
       else
         return end()-2;  // If not, just don't allocate RIP or RSP
@@ -589,8 +594,9 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
     GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate SP or BP.
         return end() - 2;
       else
@@ -611,8 +617,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
     GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate ESP or EBP.
         return end() - 2;
       else
@@ -633,8 +640,9 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
     GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate RIP, RSP or RBP.
         return end() - 3;
       else
@@ -675,9 +683,10 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate EBP.
           return array_endof(X86_GR32_NOSP_AO_64) - 1;
         else
@@ -685,7 +694,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
           return array_endof(X86_GR32_NOSP_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF))
+        if (RI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate EBP.
           return begin() + 6;
         else
@@ -710,9 +719,11 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
-      if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+      // Does the function dedicate RBP to being a frame ptr?
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         return end()-1;  // If so, don't allocate RBP
       else
         return end();  // If not, any reg in this class is ok.
@@ -733,8 +744,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
   {
       const TargetMachine &TM = MF.getTarget();
       const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF))
+      if (RI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate RBP.
         return end() - 1;
       else
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
new file mode 100644
index 0000000..cd87b82
--- /dev/null
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-selectiondag-info"
+#include "X86SelectionDAGInfo.h"
+using namespace llvm;
+
+X86SelectionDAGInfo::X86SelectionDAGInfo() {
+}
+
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
new file mode 100644
index 0000000..9834754
--- /dev/null
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SELECTIONDAGINFO_H
+#define X86SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  X86SelectionDAGInfo();
+  ~X86SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8a873f0..646af91 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -85,8 +85,7 @@ protected:
   bool IsUAMemFast;
 
   /// HasVectorUAMem - True if SIMD operations can have unaligned memory
-  ///                  operands. This may require setting a feature bit in the
-  ///                  processor.
+  /// operands. This may require setting a feature bit in the processor.
   bool HasVectorUAMem;
 
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c608e56..f39904e 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,12 +17,17 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+
 using namespace llvm;
 
+static cl::opt<bool> DisableSSEDomain("disable-sse-domain",
+    cl::init(false), cl::Hidden,
+    cl::desc("Disable SSE Domain Fixing"));
+
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
@@ -161,6 +166,7 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
 
 bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createX86MaxStackAlignmentHeuristicPass());
   return false;  // -print-machineinstr shouldn't print after this.
 }
 
@@ -172,7 +178,8 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
 
 bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
-  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() &&
+      !DisableSSEDomain) {
     PM.add(createSSEDomainFixPass());
     return true;
   }
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index ae7b5b2..dc4234c 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -51,8 +51,8 @@ public:
   virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
   virtual       X86JITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const X86Subtarget     *getSubtargetImpl() const{ return &Subtarget; }
-  virtual       X86TargetLowering *getTargetLowering() const { 
-    return const_cast<X86TargetLowering*>(&TLInfo); 
+  virtual const X86TargetLowering *getTargetLowering() const { 
+    return &TLInfo;
   }
   virtual const X86RegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index 5801b40..c100c59 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -123,7 +123,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   switch (GV->getLinkage()) {
   case GlobalValue::AppendingLinkage:
-    llvm_report_error("AppendingLinkage is not supported by this target!");
+    report_fatal_error("AppendingLinkage is not supported by this target!");
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
   case GlobalValue::WeakAnyLinkage:
@@ -148,7 +148,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     llvm_unreachable("Unknown linkage type!");
   }
 
-  EmitAlignment(Align, GV, 2);
+  EmitAlignment(Align > 2 ? Align : 2, GV);
   
   unsigned Size = TD->getTypeAllocSize(C->getType());
   if (GV->isThreadLocal()) {
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 0965323..1b8e7ed 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -11,7 +11,6 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv)
 tablegen(XCoreGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(XCore
-  MCSectionXCore.cpp
   XCoreFrameInfo.cpp
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
@@ -21,4 +20,5 @@ add_llvm_target(XCore
   XCoreSubtarget.cpp
   XCoreTargetMachine.cpp
   XCoreTargetObjectFile.cpp
+  XCoreSelectionDAGInfo.cpp
   )
diff --git a/lib/Target/XCore/MCSectionXCore.cpp b/lib/Target/XCore/MCSectionXCore.cpp
deleted file mode 100644
index 5acceaf..0000000
--- a/lib/Target/XCore/MCSectionXCore.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- MCSectionXCore.cpp - XCore-specific section representation ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the MCSectionXCore class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCSectionXCore.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-MCSectionXCore *
-MCSectionXCore::Create(const StringRef &Section, unsigned Type,
-                       unsigned Flags, SectionKind K,
-                       bool isExplicit, MCContext &Ctx) {
-  return new (Ctx) MCSectionXCore(Section, Type, Flags, K, isExplicit);
-}
-
-
-/// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp
-/// section flags.
-void MCSectionXCore::PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
-                                                     raw_ostream &OS) const {
-  if (getFlags() & MCSectionXCore::SHF_CP_SECTION)
-    OS << 'c';
-  if (getFlags() & MCSectionXCore::SHF_DP_SECTION)
-    OS << 'd';
-}
diff --git a/lib/Target/XCore/MCSectionXCore.h b/lib/Target/XCore/MCSectionXCore.h
deleted file mode 100644
index 02f8f95..0000000
--- a/lib/Target/XCore/MCSectionXCore.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- MCSectionXCore.h - XCore-specific section representation -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the MCSectionXCore class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MCSECTION_XCORE_H
-#define LLVM_MCSECTION_XCORE_H
-
-#include "llvm/MC/MCSectionELF.h"
-
-namespace llvm {
-  
-class MCSectionXCore : public MCSectionELF {
-  MCSectionXCore(const StringRef &Section, unsigned Type, unsigned Flags,
-                 SectionKind K, bool isExplicit)
-    : MCSectionELF(Section, Type, Flags, K, isExplicit) {}
-  
-public:
-  
-  enum {
-    /// SHF_CP_SECTION - All sections with the "c" flag are grouped together
-    /// by the linker to form the constant pool and the cp register is set to
-    /// the start of the constant pool by the boot code.
-    SHF_CP_SECTION = FIRST_TARGET_DEP_FLAG,
-    
-    /// SHF_DP_SECTION - All sections with the "d" flag are grouped together
-    /// by the linker to form the data section and the dp register is set to
-    /// the start of the section by the boot code.
-    SHF_DP_SECTION = FIRST_TARGET_DEP_FLAG << 1
-  };
-  
-  static MCSectionXCore *Create(const StringRef &Section, unsigned Type,
-                                unsigned Flags, SectionKind K,
-                                bool isExplicit, MCContext &Ctx);
-  
-  
-  /// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp
-  /// section flags.
-  virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI,
-                                               raw_ostream &OS) const;
-
-};
-  
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1615547..5564ddf 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCore.h"
-#include "XCoreISelLowering.h"
 #include "XCoreTargetMachine.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -40,7 +39,7 @@ using namespace llvm;
 ///
 namespace {
   class XCoreDAGToDAGISel : public SelectionDAGISel {
-    XCoreTargetLowering &Lowering;
+    const XCoreTargetLowering &Lowering;
     const XCoreSubtarget &Subtarget;
 
   public:
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 27e5233..3990b8b 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -158,7 +158,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
 }
 
 SDValue XCoreTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) {
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) 
   {
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
@@ -187,7 +187,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
 /// type with new values built out of custom code.
 void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) {
+                                             SelectionDAG &DAG) const {
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this!");
@@ -210,7 +210,7 @@ getFunctionAlignment(const Function *) const {
 //===----------------------------------------------------------------------===//
 
 SDValue XCoreTargetLowering::
-LowerSELECT_CC(SDValue Op, SelectionDAG &DAG)
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc dl = Op.getDebugLoc();
   SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
@@ -220,7 +220,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG)
+getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                        SelectionDAG &DAG) const
 {
   // FIXME there is no actual debug info here
   DebugLoc dl = GA.getDebugLoc();
@@ -241,9 +242,9 @@ getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
 {
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
   // If it's a debug information descriptor, don't mess with it.
   if (DAG.isVerifiedDebugInfoDesc(Op))
@@ -262,12 +263,12 @@ static inline bool isZeroLengthArray(const Type *Ty) {
 }
 
 SDValue XCoreTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
   // FIXME there isn't really debug info here
   DebugLoc dl = Op.getDebugLoc();
   // transform to label + getid() * size
-  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
   const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
   if (!GVar) {
@@ -296,18 +297,18 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerBlockAddress(SDValue Op, SelectionDAG &DAG)
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc DL = Op.getDebugLoc();
 
-  BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
 
   return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
 }
 
 SDValue XCoreTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG)
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   // FIXME there isn't really debug info here
@@ -329,7 +330,7 @@ unsigned XCoreTargetLowering::getJumpTableEncoding() const {
 }
 
 SDValue XCoreTargetLowering::
-LowerBR_JT(SDValue Op, SelectionDAG &DAG)
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue Chain = Op.getOperand(0);
   SDValue Table = Op.getOperand(1);
@@ -391,7 +392,7 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
 }
 
 SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG)
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 {
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
@@ -494,7 +495,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG)
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const
 {
   StoreSDNode *ST = cast<StoreSDNode>(Op);
   assert(!ST->isTruncatingStore() && "Unexpected store type");
@@ -554,7 +555,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG)
+LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
 {
   assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI &&
          "Unexpected operand to lower!");
@@ -571,7 +572,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG)
+LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
 {
   assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI &&
          "Unexpected operand to lower!");
@@ -647,7 +648,7 @@ isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0,
 }
 
 SDValue XCoreTargetLowering::
-TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG)
+TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
 {
   SDValue Mul;
   SDValue Other;
@@ -707,7 +708,7 @@ TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
 {
   assert(N->getValueType(0) == MVT::i64 &&
          (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
@@ -747,7 +748,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerVAARG(SDValue Op, SelectionDAG &DAG)
+LowerVAARG(SDValue Op, SelectionDAG &DAG) const
 {
   llvm_unreachable("unimplemented");
   // FIX Arguments passed by reference need a extra dereference.
@@ -769,7 +770,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerVASTART(SDValue Op, SelectionDAG &DAG)
+LowerVASTART(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc dl = Op.getDebugLoc();
   // vastart stores the address of the VarArgsFrameIndex slot into the
@@ -782,7 +783,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG)
                       false, false, 0);
 }
 
-SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Depths > 0 not supported yet! 
   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
@@ -812,7 +814,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) {
+                               SmallVectorImpl<SDValue> &InVals) const {
   // XCore target does not yet support tail call optimization.
   isTailCall = false;
 
@@ -839,7 +841,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) {
+                                    SmallVectorImpl<SDValue> &InVals) const {
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -962,7 +964,7 @@ XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                      CallingConv::ID CallConv, bool isVarArg,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                      DebugLoc dl, SelectionDAG &DAG,
-                                     SmallVectorImpl<SDValue> &InVals) {
+                                     SmallVectorImpl<SDValue> &InVals) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -994,7 +996,8 @@ XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                           DebugLoc dl,
                                           SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals) {
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
   switch (CallConv)
   {
     default:
@@ -1018,7 +1021,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
                                          &Ins,
                                        DebugLoc dl,
                                        SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) {
+                                       SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -1132,7 +1135,7 @@ bool XCoreTargetLowering::
 CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                const SmallVectorImpl<EVT> &OutTys,
                const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-               SelectionDAG &DAG) {
+               SelectionDAG &DAG) const {
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  RVLocs, *DAG.getContext());
@@ -1143,7 +1146,7 @@ SDValue
 XCoreTargetLowering::LowerReturn(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 DebugLoc dl, SelectionDAG &DAG) {
+                                 DebugLoc dl, SelectionDAG &DAG) const {
 
   // CCValAssign - represent the assignment of
   // the return value to a location
@@ -1194,8 +1197,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
 
 MachineBasicBlock *
 XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB,
-                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+                                                 MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == XCore::SELECT_CC) &&
@@ -1225,12 +1227,9 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   F->insert(It, sinkMBB);
   // Update machine-CFG edges by first adding all successors of the current
   // block to the new block which will contain the Phi node for the select.
-  // Also inform sdisel of the edge changes.
   for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-         E = BB->succ_end(); I != E; ++I) {
-    EM->insert(std::make_pair(*I, sinkMBB));
+         E = BB->succ_end(); I != E; ++I)
     sinkMBB->addSuccessor(*I);
-  }
   // Next, remove all successors of the current block, and add the true
   // and fallthrough blocks as its successors.
   while (!BB->succ_empty())
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 3ccdeec..d8d2a3a 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -83,21 +83,21 @@ namespace llvm {
     virtual unsigned getJumpTableEncoding() const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     /// ReplaceNodeResults - Replace the results of node with an illegal result
     /// type with new values built out of custom code.
     ///
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG);
+                                    SelectionDAG &DAG) const;
 
     /// getTargetNodeName - This method returns the name of a target specific 
     //  DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
   
-    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                         MachineBasicBlock *MBB,
-                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
 
     virtual bool isLegalAddressingMode(const AddrMode &AM,
                                        const Type *Ty) const;
@@ -115,37 +115,37 @@ namespace llvm {
                               bool isVarArg,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals);
+                              SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
                            CallingConv::ID CallConv, bool isVarArg,
                            bool isTailCall,
                            const SmallVectorImpl<ISD::OutputArg> &Outs,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals);
-    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
-    SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV,
-                                    SelectionDAG &DAG);
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+    SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                                    SelectionDAG &DAG) const;
 
     // Lower Operand specifics
-    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
   
     // Inline asm support
     std::vector<unsigned>
@@ -153,8 +153,8 @@ namespace llvm {
               EVT VT) const;
   
     // Expand specifics
-    SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG);
-    SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG);
+    SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
+    SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
@@ -171,7 +171,7 @@ namespace llvm {
                            bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals);
+                           SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
@@ -180,19 +180,19 @@ namespace llvm {
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals);
+                SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  DebugLoc dl, SelectionDAG &DAG);
+                  DebugLoc dl, SelectionDAG &DAG) const;
 
     virtual bool
       CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<EVT> &OutTys,
                      const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
-                     SelectionDAG &DAG);
+                     SelectionDAG &DAG) const;
   };
 }
 
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index ab71d05..0cfb358 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -113,7 +113,7 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
 }
 
 bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
-  return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
 }
 
 // This function eliminates ADJCALLSTACKDOWN,
@@ -225,12 +225,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     unsigned FramePtr = XCore::R10;
     
     if (!isUs) {
-      if (!RS) {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "eliminateFrameIndex Frame size too big: " << Offset;
-        llvm_report_error(Msg.str());
-      }
+      if (!RS)
+        report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                           Twine(Offset));
       unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
                                                  SPAdj);
       loadConstant(MBB, II, ScratchReg, Offset, dl);
@@ -278,12 +275,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     }
   } else {
     bool isU6 = isImmU6(Offset);
-    if (!isU6 && !isImmU16(Offset)) {
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "eliminateFrameIndex Frame size too big: " << Offset;
-      llvm_report_error(Msg.str());
-    }
+    if (!isU6 && !isImmU16(Offset))
+      report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                         Twine(Offset));
 
     switch (MI.getOpcode()) {
     int NewOpcode;
@@ -360,10 +354,7 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   // TODO use mkmsk if possible.
   if (!isImmU16(Value)) {
     // TODO use constant pool.
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "loadConstant value too big " << Value;
-    llvm_report_error(Msg.str());
+    report_fatal_error("loadConstant value too big " + Twine(Value));
   }
   int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
@@ -375,12 +366,8 @@ storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   assert(Offset%4 == 0 && "Misaligned stack offset");
   Offset/=4;
   bool isU6 = isImmU6(Offset);
-  if (!isU6 && !isImmU16(Offset)) {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "storeToStack offset too big " << Offset;
-    llvm_report_error(Msg.str());
-  }
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("storeToStack offset too big " + Twine(Offset));
   int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode))
     .addReg(SrcReg)
@@ -393,12 +380,8 @@ loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   assert(Offset%4 == 0 && "Misaligned stack offset");
   Offset/=4;
   bool isU6 = isImmU6(Offset);
-  if (!isU6 && !isImmU16(Offset)) {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "loadFromStack offset too big " << Offset;
-    llvm_report_error(Msg.str());
-  }
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("loadFromStack offset too big " + Twine(Offset));
   int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
   BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
     .addImm(Offset);
@@ -425,10 +408,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   if (!isU6 && !isImmU16(FrameSize)) {
     // FIXME could emit multiple instructions.
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "emitPrologue Frame size too big: " << FrameSize;
-    llvm_report_error(Msg.str());
+    report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
   }
   bool emitFrameMoves = needsFrameMoves(MF);
 
@@ -549,10 +529,7 @@ void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF,
 
   if (!isU6 && !isImmU16(FrameSize)) {
     // FIXME could emit multiple instructions.
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "emitEpilogue Frame size too big: " << FrameSize;
-    llvm_report_error(Msg.str());
+    report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
   }
 
   if (FrameSize) {
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
new file mode 100644
index 0000000..6aac237
--- /dev/null
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-selectiondag-info"
+#include "XCoreSelectionDAGInfo.h"
+using namespace llvm;
+
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() {
+}
+
+XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
+}
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
new file mode 100644
index 0000000..fd96716
--- /dev/null
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCore subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESELECTIONDAGINFO_H
+#define XCORESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  XCoreSelectionDAGInfo();
+  ~XCoreSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index b0b1464..701a6f1 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -36,8 +36,8 @@ public:
   virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; }
   virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
-  virtual       XCoreTargetLowering *getTargetLowering() const {
-    return const_cast<XCoreTargetLowering*>(&TLInfo);
+  virtual const XCoreTargetLowering *getTargetLowering() const {
+    return &TLInfo;
   }
 
   virtual const TargetRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 7de3b55..cdf5a53 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -9,7 +9,8 @@
 
 #include "XCoreTargetObjectFile.h"
 #include "XCoreSubtarget.h"
-#include "MCSectionXCore.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
@@ -18,34 +19,31 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
 
   DataSection =
-    MCSectionXCore::Create(".dp.data", MCSectionELF::SHT_PROGBITS, 
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
-                           MCSectionXCore::SHF_DP_SECTION,
-                           SectionKind::getDataRel(), false, getContext());
+    Ctx.getELFSection(".dp.data", MCSectionELF::SHT_PROGBITS, 
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
+                      MCSectionELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getDataRel(), false);
   BSSSection =
-    MCSectionXCore::Create(".dp.bss", MCSectionELF::SHT_NOBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
-                           MCSectionXCore::SHF_DP_SECTION,
-                           SectionKind::getBSS(), false, getContext());
+    Ctx.getELFSection(".dp.bss", MCSectionELF::SHT_NOBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
+                      MCSectionELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getBSS(), false);
   
   MergeableConst4Section = 
-    MCSectionXCore::Create(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getMergeableConst4(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst4(), false);
   MergeableConst8Section = 
-    MCSectionXCore::Create(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getMergeableConst8(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst8(), false);
   MergeableConst16Section = 
-    MCSectionXCore::Create(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getMergeableConst16(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst16(), false);
   
   // TLS globals are lowered in the backend to arrays indexed by the current
   // thread id. After lowering they require no special handling by the linker
@@ -54,11 +52,10 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TLSBSSSection = BSSSection;
 
   ReadOnlySection = 
-    MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS,
-                           MCSectionELF::SHF_ALLOC |
-                           MCSectionXCore::SHF_CP_SECTION,
-                           SectionKind::getReadOnlyWithRel(), false,
-                           getContext());
+    Ctx.getELFSection(".cp.rodata", MCSectionELF::SHT_PROGBITS,
+                      MCSectionELF::SHF_ALLOC |
+                      MCSectionELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getReadOnlyWithRel(), false);
 
   // Dynamic linking is not supported. Data with relocations is placed in the
   // same section as data without relocations.
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 40a87e8..89f213e 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -64,7 +64,7 @@ namespace {
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
     explicit ArgPromotion(unsigned maxElements = 3)
       : CallGraphSCCPass(&ID), maxElements(maxElements) {}
@@ -91,20 +91,21 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
   return new ArgPromotion(maxElements);
 }
 
-bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
   bool Changed = false, LocalChange;
 
   do {  // Iterate until we stop promoting from this SCC.
     LocalChange = false;
     // Attempt to promote arguments from all functions in this SCC.
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-      if (CallGraphNode *CGN = PromoteArguments(SCC[i])) {
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      if (CallGraphNode *CGN = PromoteArguments(*I)) {
         LocalChange = true;
-        SCC[i] = CGN;
+        SCC.ReplaceNode(*I, CGN);
       }
+    }
     Changed |= LocalChange;               // Remember that we changed something.
   } while (LocalChange);
-
+  
   return Changed;
 }
 
@@ -873,8 +874,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   
   NF_CGN->stealCalledFunctionsFrom(CG[F]);
   
-  // Now that the old function is dead, delete it.
-  delete CG.removeFunctionFromModule(F);
+  // Now that the old function is dead, delete it.  If there is a dangling
+  // reference to the CallgraphNode, just leave the dead function around for
+  // someone else to nuke.
+  CallGraphNode *CGN = CG[F];
+  if (CGN->getNumReferences() == 0)
+    delete CG.removeFunctionFromModule(CGN);
+  else
+    F->setLinkage(Function::ExternalLinkage);
   
   return NF_CGN;
 }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 92bef3b..65483e8 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -23,3 +23,5 @@ add_llvm_library(LLVMipo
   StripSymbols.cpp
   StructRetPromotion.cpp
   )
+
+target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine)
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 227602d..6443dd4 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -243,6 +243,9 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
       if (cast<CallInst>(Call)->isTailCall())
         cast<CallInst>(New)->setTailCall();
     }
+    if (MDNode *N = Call->getDbgMetadata())
+      New->setDbgMetadata(N);
+
     Args.clear();
 
     if (!Call->use_empty())
@@ -694,18 +697,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(),
                                         AttributesVec.end());
 
-  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
-  // have zero fixed arguments.
-  //
-  // Note that we apply this hack for a vararg fuction that does not have any
-  // arguments anymore, but did have them before (so don't bother fixing
-  // functions that were already broken wrt CWriter).
-  bool ExtraArgHack = false;
-  if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) {
-    ExtraArgHack = true;
-    Params.push_back(Type::getInt32Ty(F->getContext()));
-  }
-
   // Create the new function type based on the recomputed parameters.
   FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
 
@@ -755,9 +746,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
       }
 
-    if (ExtraArgHack)
-      Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext())));
-
     // Push any varargs arguments on the list. Don't forget their attributes.
     for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
       Args.push_back(*I);
@@ -785,6 +773,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (cast<CallInst>(Call)->isTailCall())
         cast<CallInst>(New)->setTailCall();
     }
+    if (MDNode *N = Call->getDbgMetadata())
+      New->setDbgMetadata(N);
+
     Args.clear();
 
     if (!Call->use_empty()) {
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 298d5cf..9bd7af6 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -44,20 +44,20 @@ namespace {
     FunctionAttrs() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(CallGraphSCC &SCC);
 
     // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-    bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC);
+    bool AddReadAttrs(const CallGraphSCC &SCC);
 
     // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
-    bool AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC);
+    bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
 
     // IsFunctionMallocLike - Does this function allocate new memory?
     bool IsFunctionMallocLike(Function *F,
                               SmallPtrSet<Function*, 8> &) const;
 
     // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-    bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC);
+    bool AddNoAliasAttrs(const CallGraphSCC &SCC);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
@@ -123,19 +123,19 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) {
 }
 
 /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
   SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]->getFunction());
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
 
   // Check if any of the functions in the SCC read or write memory.  If they
   // write memory then they can't be marked readnone or readonly.
   bool ReadsMemory = false;
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F == 0)
       // External node - may write memory.  Just give up.
@@ -210,8 +210,8 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
   // Success!  Functions in this SCC do not access memory, or only read memory.
   // Give them the appropriate attribute.
   bool MadeChange = false;
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F->doesNotAccessMemory())
       // Already perfect!
@@ -239,13 +239,13 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
 }
 
 /// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
-bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
   bool Changed = false;
 
   // Check each function in turn, determining which pointer arguments are not
   // captured.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F == 0)
       // External node - skip it;
@@ -334,18 +334,18 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
 }
 
 /// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
   SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]->getFunction());
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
 
   // Check each function in turn, determining which functions return noalias
   // pointers.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
 
     if (F == 0)
       // External node - skip it;
@@ -370,8 +370,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
   }
 
   bool MadeChange = false;
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
       continue;
 
@@ -383,7 +383,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
   return MadeChange;
 }
 
-bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
   bool Changed = AddReadAttrs(SCC);
   Changed |= AddNoCaptureAttrs(SCC);
   Changed |= AddNoAliasAttrs(SCC);
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ddff5ef..b429213 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -143,7 +143,8 @@ struct GlobalStatus {
 static bool SafeToDestroyConstant(const Constant *C) {
   if (isa<GlobalValue>(C)) return false;
 
-  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
+  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+       ++UI)
     if (const Constant *CU = dyn_cast<Constant>(*UI)) {
       if (!SafeToDestroyConstant(CU)) return false;
     } else
@@ -158,7 +159,8 @@ static bool SafeToDestroyConstant(const Constant *C) {
 ///
 static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
                           SmallPtrSet<const PHINode*, 16> &PHIUsers) {
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI)
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
       GS.HasNonInstructionUser = true;
 
@@ -185,7 +187,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
         // value, not an aggregate), keep more specific information about
         // stores.
         if (GS.StoredType != GlobalStatus::isStored) {
-          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){
+          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
+                                                           SI->getOperand(1))) {
             Value *StoredVal = SI->getOperand(0);
             if (StoredVal == GV->getInitializer()) {
               if (GS.StoredType < GlobalStatus::isInitializerStored)
@@ -610,62 +613,69 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
 /// value will trap if the value is dynamically null.  PHIs keeps track of any 
 /// phi nodes we've seen to avoid reprocessing them.
-static bool AllUsesOfValueWillTrapIfNull(Value *V,
-                                         SmallPtrSet<PHINode*, 8> &PHIs) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<LoadInst>(*UI)) {
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const User *U = *UI;
+
+    if (isa<LoadInst>(U)) {
       // Will trap.
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
       if (SI->getOperand(0) == V) {
-        //cerr << "NONTRAPPING USE: " << **UI;
+        //cerr << "NONTRAPPING USE: " << *U;
         return false;  // Storing the value.
       }
-    } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
       if (CI->getCalledValue() != V) {
-        //cerr << "NONTRAPPING USE: " << **UI;
+        //cerr << "NONTRAPPING USE: " << *U;
         return false;  // Not calling the ptr
       }
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
       if (II->getCalledValue() != V) {
-        //cerr << "NONTRAPPING USE: " << **UI;
+        //cerr << "NONTRAPPING USE: " << *U;
         return false;  // Not calling the ptr
       }
-    } else if (BitCastInst *CI = dyn_cast<BitCastInst>(*UI)) {
+    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
       if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
-    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) {
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
       if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
-    } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
+    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
       // If we've already seen this phi node, ignore it, it has already been
       // checked.
       if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
         return false;
-    } else if (isa<ICmpInst>(*UI) &&
+    } else if (isa<ICmpInst>(U) &&
                isa<ConstantPointerNull>(UI->getOperand(1))) {
       // Ignore icmp X, null
     } else {
-      //cerr << "NONTRAPPING USE: " << **UI;
+      //cerr << "NONTRAPPING USE: " << *U;
       return false;
     }
+  }
   return true;
 }
 
 /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
 /// from GV will trap if the loaded value is null.  Note that this also permits
 /// comparisons of the loaded value against null, as a special case.
-static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
-  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI)
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
-      SmallPtrSet<PHINode*, 8> PHIs;
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      SmallPtrSet<const PHINode*, 8> PHIs;
       if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
         return false;
-    } else if (isa<StoreInst>(*UI)) {
+    } else if (isa<StoreInst>(U)) {
       // Ignore stores to the global.
     } else {
       // We don't know or understand this user, bail out.
-      //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI;
+      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
       return false;
     }
-
+  }
   return true;
 }
 
@@ -682,16 +692,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
         Changed = true;
       }
     } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-      if (I->getOperand(0) == V) {
+      CallSite CS(I);
+      if (CS.getCalledValue() == V) {
         // Calling through the pointer!  Turn into a direct call, but be careful
         // that the pointer is not also being passed as an argument.
-        I->setOperand(0, NewV);
+        CS.setCalledFunction(NewV);
         Changed = true;
         bool PassedAsArg = false;
-        for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i)
-          if (I->getOperand(i) == V) {
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.getArgument(i) == V) {
             PassedAsArg = true;
-            I->setOperand(i, NewV);
+            CS.setArgument(i, NewV);
           }
 
         if (PassedAsArg) {
@@ -938,29 +949,31 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 /// to make sure that there are no complex uses of V.  We permit simple things
 /// like dereferencing the pointer, but not storing through the address, unless
 /// it is to the specified global.
-static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
-                                                      GlobalVariable *GV,
-                                              SmallPtrSet<PHINode*, 8> &PHIs) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    Instruction *Inst = cast<Instruction>(*UI);
-    
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+                                                      const GlobalVariable *GV,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    const Instruction *Inst = cast<Instruction>(*UI);
+
     if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
       continue; // Fine, ignore.
     }
     
-    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
         return false;  // Storing the pointer itself... bad.
       continue; // Otherwise, storing through it, or storing into GV... fine.
     }
     
-    if (isa<GetElementPtrInst>(Inst)) {
+    // Must index into the array and into the struct.
+    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
         return false;
       continue;
     }
     
-    if (PHINode *PN = dyn_cast<PHINode>(Inst)) {
+    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
       // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
       // cycles.
       if (PHIs.insert(PN))
@@ -969,7 +982,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
       continue;
     }
     
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
         return false;
       continue;
@@ -1029,11 +1042,12 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
 /// of a load) are simple enough to perform heap SRA on.  This permits GEP's
 /// that index through the array and struct field, icmps of null, and PHIs.
 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
-                              SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
-                              SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
   // We permit two users of the load: setcc comparing against the null
   // pointer, and a getelementptr of a specific form.
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
     const Instruction *User = cast<Instruction>(*UI);
     
     // Comparison against null is ok.
@@ -1084,8 +1098,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
                                                     Instruction *StoredVal) {
   SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
   SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
-  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
-       ++UI)
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI)
     if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
       if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
                                           LoadUsingPHIsPerLoad))
@@ -1098,8 +1112,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
   // that all inputs the to the PHI nodes are in the same equivalence sets. 
   // Check to verify that all operands of the PHIs are either PHIS that can be
   // transformed, loads from GV, or MI itself.
-  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin(),
-       E = LoadUsingPHIs.end(); I != E; ++I) {
+  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
+       , E = LoadUsingPHIs.end(); I != E; ++I) {
     const PHINode *PN = *I;
     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
       Value *InVal = PN->getIncomingValue(op);
@@ -1448,6 +1462,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                const Type *AllocTy,
                                                Module::global_iterator &GVI,
                                                TargetData *TD) {
+  if (!TD)
+    return false;
+          
   // If this is a malloc of an abstract type, don't touch it.
   if (!AllocTy->isSized())
     return false;
@@ -1466,66 +1483,66 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   // malloc to be stored into the specified global, loaded setcc'd, and
   // GEP'd.  These are all things we could transform to using the global
   // for.
-  {
-    SmallPtrSet<PHINode*, 8> PHIs;
-    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
-      return false;
-  }  
+  SmallPtrSet<const PHINode*, 8> PHIs;
+  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+    return false;
 
   // If we have a global that is only initialized with a fixed size malloc,
   // transform the program to use global memory instead of malloc'd memory.
   // This eliminates dynamic allocation, avoids an indirection accessing the
   // data, and exposes the resultant global to further GlobalOpt.
   // We cannot optimize the malloc if we cannot determine malloc array size.
-  if (Value *NElems = getMallocArraySize(CI, TD, true)) {
-    if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
-      // Restrict this transformation to only working on small allocations
-      // (2048 bytes currently), as we don't want to introduce a 16M global or
-      // something.
-      if (TD && 
-          NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
-        return true;
-      }
+  Value *NElems = getMallocArraySize(CI, TD, true);
+  if (!NElems)
+    return false;
+
+  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+      return true;
+    }
   
-    // If the allocation is an array of structures, consider transforming this
-    // into multiple malloc'd arrays, one for each field.  This is basically
-    // SRoA for malloc'd memory.
-
-    // If this is an allocation of a fixed size array of structs, analyze as a
-    // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
-    if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
-      if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
-        AllocTy = AT->getElementType();
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
+    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
   
-    if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
-      // This the structure has an unreasonable number of fields, leave it
-      // alone.
-      if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
-          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
-
-        // If this is a fixed size array, transform the Malloc to be an alloc of
-        // structs.  malloc [100 x struct],1 -> malloc struct, 100
-        if (const ArrayType *AT =
-                              dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
-          const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
-          unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
-          Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
-          Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
-          Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
-                                                       AllocSize, NumElements,
-                                                       CI->getName());
-          Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
-          CI->replaceAllUsesWith(Cast);
-          CI->eraseFromParent();
-          CI = dyn_cast<BitCastInst>(Malloc) ?
-               extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
-        }
-      
-        GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
-        return true;
-      }
+  const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+  if (!AllocSTy)
+    return false;
+
+  // This the structure has an unreasonable number of fields, leave it
+  // alone.
+  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+    // If this is a fixed size array, transform the Malloc to be an alloc of
+    // structs.  malloc [100 x struct],1 -> malloc struct, 100
+    if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+      const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+      unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+                                                   AllocSize, NumElements,
+                                                   CI->getName());
+      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+      CI->replaceAllUsesWith(Cast);
+      CI->eraseFromParent();
+      CI = dyn_cast<BitCastInst>(Malloc) ?
+        extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
     }
+      
+    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+    return true;
   }
   
   return false;
@@ -1689,8 +1706,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
       DEBUG(dbgs() << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n");
     if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
-      DEBUG(dbgs() << "  AccessingFunction = " << GS.AccessingFunction->getName()
-                  << "\n");
+      DEBUG(dbgs() << "  AccessingFunction = "
+                   << GS.AccessingFunction->getName() << "\n");
     DEBUG(dbgs() << "  HasMultipleAccessingFunctions =  "
                  << GS.HasMultipleAccessingFunctions << "\n");
     DEBUG(dbgs() << "  HasNonInstructionUser = " 
@@ -2278,10 +2295,10 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       }
 
       // Cannot handle inline asm.
-      if (isa<InlineAsm>(CI->getOperand(0))) return false;
+      if (isa<InlineAsm>(CI->getCalledValue())) return false;
 
       // Resolve function pointers.
-      Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0)));
+      Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue()));
       if (!Callee) return false;  // Cannot resolve.
 
       SmallVector<Constant*, 8> Formals;
@@ -2500,7 +2517,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
         continue;
 
       // Do not perform the transform if multiple aliases potentially target the
-      // aliasee.  This check also ensures that it is safe to replace the section
+      // aliasee. This check also ensures that it is safe to replace the section
       // and other attributes of the aliasee with those of the alias.
       if (!hasOneUse)
         continue;
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 83e8624..340b70e 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -62,6 +62,15 @@ void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createPruneEHPass());
 }
 
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+  unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+}
+
+
 void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
   // FIXME: Remove in LLVM 3.0.
 }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 03ec72c..b785bb0 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -73,16 +73,14 @@ InlinedArrayAllocasTy;
 /// available from other  functions inlined into the caller.  If we are able to
 /// inline this call site we attempt to reuse already available allocas or add
 /// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
-                                 const TargetData *TD,
+static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
                                  InlinedArrayAllocasTy &InlinedArrayAllocas) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
   // Try to inline the function.  Get the list of static allocas that were
   // inlined.
-  SmallVector<AllocaInst*, 16> StaticAllocas;
-  if (!InlineFunction(CS, &CG, TD, &StaticAllocas))
+  if (!InlineFunction(CS, IFI))
     return false;
 
   // If the inlined function had a higher stack protection level than the
@@ -119,9 +117,9 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
   
   // Loop over all the allocas we have so far and see if they can be merged with
   // a previously inlined alloca.  If not, remember that we had it.
-  for (unsigned AllocaNo = 0, e = StaticAllocas.size();
+  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
        AllocaNo != e; ++AllocaNo) {
-    AllocaInst *AI = StaticAllocas[AllocaNo];
+    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
     
     // Don't bother trying to merge array allocations (they will usually be
     // canonicalized to be an allocation *of* an array), or allocations whose
@@ -292,14 +290,29 @@ bool Inliner::shouldInline(CallSite CS) {
   return true;
 }
 
-bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+            const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraph>();
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
   DEBUG(dbgs() << "Inliner visiting SCC:");
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (F) SCCFunctions.insert(F);
     DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
   }
@@ -307,10 +320,16 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
   // Scan through and identify all call sites ahead of time so that we only
   // inline call sites in the original functions, not call sites that result
   // from inlining other functions.
-  SmallVector<CallSite, 16> CallSites;
-
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  SmallVector<std::pair<CallSite, int>, 16> CallSites;
+  
+  // When inlining a callee produces new call sites, we want to keep track of
+  // the fact that they were inlined from the callee.  This allows us to avoid
+  // infinite inlining in some obscure cases.  To represent this, we use an
+  // index into the InlineHistory vector.
+  SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (!F) continue;
     
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
@@ -327,22 +346,27 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
         if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
           continue;
         
-        CallSites.push_back(CS);
+        CallSites.push_back(std::make_pair(CS, -1));
       }
   }
 
   DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
 
+  // If there are no calls in this function, exit early.
+  if (CallSites.empty())
+    return false;
+  
   // Now that we have all of the call sites, move the ones to functions in the
   // current SCC to the end of the list.
   unsigned FirstCallInSCC = CallSites.size();
   for (unsigned i = 0; i < FirstCallInSCC; ++i)
-    if (Function *F = CallSites[i].getCalledFunction())
+    if (Function *F = CallSites[i].first.getCalledFunction())
       if (SCCFunctions.count(F))
         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
 
   
   InlinedArrayAllocasTy InlinedArrayAllocas;
+  InlineFunctionInfo InlineInfo(&CG, TD);
   
   // Now that we have all of the call sites, loop over them and inline them if
   // it looks profitable to do so.
@@ -353,7 +377,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
     // Iterate over the outer loop because inlining functions can cause indirect
     // calls to become direct calls.
     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
-      CallSite CS = CallSites[CSi];
+      CallSite CS = CallSites[CSi].first;
       
       Function *Caller = CS.getCaller();
       Function *Callee = CS.getCalledFunction();
@@ -375,16 +399,42 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
         // We can only inline direct calls to non-declarations.
         if (Callee == 0 || Callee->isDeclaration()) continue;
       
+        // If this call sites was obtained by inlining another function, verify
+        // that the include path for the function did not include the callee
+        // itself.  If so, we'd be recursively inlinling the same function,
+        // which would provide the same callsites, which would cause us to
+        // infinitely inline.
+        int InlineHistoryID = CallSites[CSi].second;
+        if (InlineHistoryID != -1 &&
+            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+          continue;
+        
+        
         // If the policy determines that we should inline this function,
         // try to do so.
         if (!shouldInline(CS))
           continue;
 
-        // Attempt to inline the function...
-        if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
+        // Attempt to inline the function.
+        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
           continue;
         ++NumInlined;
-
+        
+        // If inlining this function gave us any new call sites, throw them
+        // onto our worklist to process.  They are useful inline candidates.
+        if (!InlineInfo.InlinedCalls.empty()) {
+          // Create a new inline history entry for this, so that we remember
+          // that these new callsites came about due to inlining Callee.
+          int NewHistoryID = InlineHistory.size();
+          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
+               i != e; ++i) {
+            Value *Ptr = InlineInfo.InlinedCalls[i];
+            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+          }
+        }
+        
         // Update the cached cost info with the inlined call.
         growCachedCostInfo(Caller, Callee);
       }
@@ -417,7 +467,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
       // swap/pop_back for efficiency, but do not use it if doing so would
       // move a call site to a function in this SCC before the
       // 'FirstCallInSCC' barrier.
-      if (SCC.size() == 1) {
+      if (SCC.isSingular()) {
         std::swap(CallSites[CSi], CallSites.back());
         CallSites.pop_back();
       } else {
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index f8ec722..07525ea 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -120,15 +120,17 @@ Function* PartialInliner::unswitchFunction(Function* F) {
   // Extract the body of the if.
   Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
   
+  InlineFunctionInfo IFI;
+  
   // Inline the top-level if test into all callers.
   std::vector<User*> Users(duplicateFunction->use_begin(), 
                            duplicateFunction->use_end());
   for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
        UI != UE; ++UI)
-    if (CallInst* CI = dyn_cast<CallInst>(*UI))
-      InlineFunction(CI);
-    else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI))
-      InlineFunction(II);
+    if (CallInst *CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI, IFI);
+    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II, IFI);
   
   // Ditch the duplicate, since we're done with it, and rewrite all remaining
   // users (function pointers, etc.) back to the original function.
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 161246b..de6099c 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -40,7 +40,7 @@ namespace {
     PruneEH() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(CallGraphSCC &SCC);
 
     bool SimplifyFunction(Function *F);
     void DeleteBasicBlock(BasicBlock *BB);
@@ -54,20 +54,20 @@ X("prune-eh", "Remove unused exception handling info");
 Pass *llvm::createPruneEHPass() { return new PruneEH(); }
 
 
-bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
   SmallPtrSet<CallGraphNode *, 8> SCCNodes;
   CallGraph &CG = getAnalysis<CallGraph>();
   bool MadeChange = false;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]);
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert(*I);
 
   // First pass, scan all of the functions in the SCC, simplifying them
   // according to what we know.
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    if (Function *F = SCC[i]->getFunction())
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (Function *F = (*I)->getFunction())
       MadeChange |= SimplifyFunction(F);
 
   // Next, check to see if any callees might throw or if there are any external
@@ -78,9 +78,9 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   // obviously the SCC might throw.
   //
   bool SCCMightUnwind = false, SCCMightReturn = false;
-  for (unsigned i = 0, e = SCC.size();
-       (!SCCMightUnwind || !SCCMightReturn) && i != e; ++i) {
-    Function *F = SCC[i]->getFunction();
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
+       (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+    Function *F = (*I)->getFunction();
     if (F == 0) {
       SCCMightUnwind = true;
       SCCMightReturn = true;
@@ -132,7 +132,7 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
 
   // If the SCC doesn't unwind or doesn't throw, note this fact.
   if (!SCCMightUnwind || !SCCMightReturn)
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
       Attributes NewAttributes = Attribute::None;
 
       if (!SCCMightUnwind)
@@ -140,19 +140,20 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
       if (!SCCMightReturn)
         NewAttributes |= Attribute::NoReturn;
 
-      const AttrListPtr &PAL = SCC[i]->getFunction()->getAttributes();
+      Function *F = (*I)->getFunction();
+      const AttrListPtr &PAL = F->getAttributes();
       const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
       if (PAL != NPAL) {
         MadeChange = true;
-        SCC[i]->getFunction()->setAttributes(NPAL);
+        F->setAttributes(NPAL);
       }
     }
 
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
     // Convert any invoke instructions to non-throwing functions in this node
     // into call instructions with a branch.  This makes the exception blocks
     // dead.
-    if (Function *F = SCC[i]->getFunction())
+    if (Function *F = (*I)->getFunction())
       MadeChange |= SimplifyFunction(F);
   }
 
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index dda32d0..473e83c 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -48,7 +48,7 @@ namespace {
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
     SRETPromotion() : CallGraphSCCPass(&ID) {}
 
@@ -69,12 +69,12 @@ Pass *llvm::createStructRetPromotionPass() {
   return new SRETPromotion();
 }
 
-bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
+bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) {
   bool Changed = false;
 
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) {
-      SCC[i] = NewNode;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (CallGraphNode *NewNode = PromoteReturn(*I)) {
+      SCC.ReplaceNode(*I, NewNode);
       Changed = true;
     }
 
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3fb3de7..8586054 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1735,16 +1735,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   
   
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS->isOne() && Op0->hasOneUse()) {
+    if (RHS->isOne() && Op0->hasOneUse())
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
-        return new ICmpInst(ICI->getInversePredicate(),
-                            ICI->getOperand(0), ICI->getOperand(1));
-
-      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
-        return new FCmpInst(FCI->getInversePredicate(),
-                            FCI->getOperand(0), FCI->getOperand(1));
-    }
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+        return CmpInst::Create(CI->getOpcode(),
+                               CI->getInversePredicate(),
+                               CI->getOperand(0), CI->getOperand(1));
 
     // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
     if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e025b05..38e7b6e 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -59,29 +59,32 @@ static unsigned EnforceKnownAlignment(Value *V,
       // Treat this like a bitcast.
       return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
     }
-    break;
+    return Align;
+  }
+  case Instruction::Alloca: {
+    AllocaInst *AI = cast<AllocaInst>(V);
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      return AI->getAlignment();
+    AI->setAlignment(PrefAlign);
+    return PrefAlign;
   }
   }
 
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // If there is a large requested alignment and we can, bump up the alignment
     // of the global.
-    if (!GV->isDeclaration()) {
-      if (GV->getAlignment() >= PrefAlign)
-        Align = GV->getAlignment();
-      else {
-        GV->setAlignment(PrefAlign);
-        Align = PrefAlign;
-      }
-    }
-  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-    // If there is a requested alignment and if this is an alloca, round up.
-    if (AI->getAlignment() >= PrefAlign)
-      Align = AI->getAlignment();
-    else {
-      AI->setAlignment(PrefAlign);
-      Align = PrefAlign;
-    }
+    if (GV->isDeclaration()) return Align;
+    
+    if (GV->getAlignment() >= PrefAlign)
+      return GV->getAlignment();
+    // We can only increase the alignment of the global if it has no alignment
+    // specified or if it is not assigned a section.  If it is assigned a
+    // section, the global could be densely packed with other objects in the
+    // section, increasing the alignment could cause padding issues.
+    if (!GV->hasSection() || GV->getAlignment() == 0)
+      GV->setAlignment(PrefAlign);
+    return GV->getAlignment();
   }
 
   return Align;
@@ -287,7 +290,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           const Type *Tys[3] = { CI.getOperand(1)->getType(),
                                  CI.getOperand(2)->getType(),
                                  CI.getOperand(3)->getType() };
-          CI.setOperand(0, 
+          CI.setCalledFunction( 
                         Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
           Changed = true;
         }
@@ -526,7 +529,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       // X + 0 -> {X, false}
       if (RHS->isZero()) {
         Constant *V[] = {
-          UndefValue::get(II->getOperand(0)->getType()),
+          UndefValue::get(II->getCalledValue()->getType()),
           ConstantInt::getFalse(II->getContext())
         };
         Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a68fc6d..eb7628e 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1323,7 +1323,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
 
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
     // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
-    // a bitconvert to a vector with the same # elts.
+    // a bitcast to a vector with the same # elts.
     if (SVI->hasOneUse() && DestTy->isVectorTy() && 
         cast<VectorType>(DestTy)->getNumElements() ==
               SVI->getType()->getNumElements() &&
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 72fd558..861cf92 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1325,7 +1325,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
         if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
           const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
-          if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
+          if (V.sgt(1) && V.isPowerOf2()) {
             Value *NewRem =
               Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
                                   BO->getName());
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2fc9325..c958cde 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -13,6 +13,7 @@
 
 #include "InstCombine.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -421,49 +422,30 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *TrueVal = SI.getTrueValue();
   Value *FalseVal = SI.getFalseValue();
 
-  // select true, X, Y  -> X
-  // select false, X, Y -> Y
-  if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal))
-    return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal);
-
-  // select C, X, X -> X
-  if (TrueVal == FalseVal)
-    return ReplaceInstUsesWith(SI, TrueVal);
-
-  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
-    return ReplaceInstUsesWith(SI, FalseVal);
-  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
-    return ReplaceInstUsesWith(SI, TrueVal);
-  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
-    if (isa<Constant>(TrueVal))
-      return ReplaceInstUsesWith(SI, TrueVal);
-    else
-      return ReplaceInstUsesWith(SI, FalseVal);
-  }
+  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+    return ReplaceInstUsesWith(SI, V);
 
   if (SI.getType()->isIntegerTy(1)) {
     if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
       if (C->getZExtValue()) {
         // Change: A = select B, true, C --> A = or B, C
         return BinaryOperator::CreateOr(CondVal, FalseVal);
-      } else {
-        // Change: A = select B, false, C --> A = and !B, C
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                             "not."+CondVal->getName()), SI);
-        return BinaryOperator::CreateAnd(NotCond, FalseVal);
       }
+      // Change: A = select B, false, C --> A = and !B, C
+      Value *NotCond =
+        InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                           "not."+CondVal->getName()), SI);
+      return BinaryOperator::CreateAnd(NotCond, FalseVal);
     } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
       if (C->getZExtValue() == false) {
         // Change: A = select B, C, false --> A = and B, C
         return BinaryOperator::CreateAnd(CondVal, TrueVal);
-      } else {
-        // Change: A = select B, C, true --> A = or !B, C
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                             "not."+CondVal->getName()), SI);
-        return BinaryOperator::CreateOr(NotCond, TrueVal);
       }
+      // Change: A = select B, C, true --> A = or !B, C
+      Value *NotCond =
+        InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                           "not."+CondVal->getName()), SI);
+      return BinaryOperator::CreateOr(NotCond, TrueVal);
     }
     
     // select a, b, a  -> a&b
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index ea4a115..e527be2 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
 include $(LEVEL)/Makefile.config
 
 # No support for plugins on windows targets
-ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW Minix))
   PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
 endif
 
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 683c1c2..5778864 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
-  SCCVN.cpp
   Scalar.cpp
   ScalarReplAggregates.cpp
   SimplifyCFGPass.cpp
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 642d59d..321def7 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1217,7 +1217,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
   return ConstantFoldLoadFromConstPtr(Src, &TD);
 }
 
-
+namespace {
 
 struct AvailableValueInBlock {
   /// BB - The basic block in question.
@@ -1291,6 +1291,8 @@ struct AvailableValueInBlock {
   }
 };
 
+}
+
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
 /// that should be used at LI's definition site.
@@ -1333,8 +1335,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   return V;
 }
 
-static bool isLifetimeStart(Instruction *Inst) {
-  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+static bool isLifetimeStart(const Instruction *Inst) {
+  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
     return II->getIntrinsicID() == Intrinsic::lifetime_start;
   return false;
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6605666..36bea67 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -97,6 +97,8 @@ namespace {
 
   private:
 
+    void EliminateIVComparisons();
+    void EliminateIVRemainders();
     void RewriteNonIntegerIVs(Loop *L);
 
     ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
@@ -133,6 +135,24 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
                                    SCEVExpander &Rewriter) {
+  // Special case: If the backedge-taken count is a UDiv, it's very likely a
+  // UDiv that ScalarEvolution produced in order to compute a precise
+  // expression, rather than a UDiv from the user's code. If we can't find a
+  // UDiv in the code with some simple searching, assume the former and forego
+  // rewriting the loop.
+  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!OrigCond) return 0;
+    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+    if (R != BackedgeTakenCount) {
+      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+      if (L != BackedgeTakenCount)
+        return 0;
+    }
+  }
+
   // If the exiting block is not the same as the backedge block, we must compare
   // against the preincremented value, otherwise we prefer to compare against
   // the post-incremented value.
@@ -142,12 +162,12 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
     // Add one to the "backedge-taken" count to get the trip count.
     // If this addition may overflow, we have to be more pessimistic and
     // cast the induction variable before doing the add.
-    const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
+    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
     const SCEV *N =
       SE->getAddExpr(BackedgeTakenCount,
-                     SE->getIntegerSCEV(1, BackedgeTakenCount->getType()));
+                     SE->getConstant(BackedgeTakenCount->getType(), 1));
     if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-        SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
       // No overflow. Cast the sum.
       RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
     } else {
@@ -155,7 +175,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
       RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
                                         IndVar->getType());
       RHS = SE->getAddExpr(RHS,
-                           SE->getIntegerSCEV(1, IndVar->getType()));
+                           SE->getConstant(IndVar->getType(), 1));
     }
 
     // The BackedgeTaken expression contains the number of times that the
@@ -336,6 +356,116 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
     SE->forgetLoop(L);
 }
 
+void IndVarSimplify::EliminateIVComparisons() {
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  // Look for ICmp users.
+  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    IVStrideUse &UI = *I;
+    ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser());
+    if (!ICmp) continue;
+
+    bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1);
+    ICmpInst::Predicate Pred = ICmp->getPredicate();
+    if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred);
+
+    // Get the SCEVs for the ICmp operands.
+    const SCEV *S = IU->getReplacementExpr(UI);
+    const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped));
+
+    // Simplify unnecessary loops away.
+    const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+    S = SE->getSCEVAtScope(S, ICmpLoop);
+    X = SE->getSCEVAtScope(X, ICmpLoop);
+
+    // If the condition is always true or always false, replace it with
+    // a constant value.
+    if (SE->isKnownPredicate(Pred, S, X))
+      ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+    else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+      ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+    else
+      continue;
+
+    DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+    DeadInsts.push_back(ICmp);
+  }
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+void IndVarSimplify::EliminateIVRemainders() {
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  // Look for SRem and URem users.
+  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    IVStrideUse &UI = *I;
+    BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser());
+    if (!Rem) continue;
+
+    bool isSigned = Rem->getOpcode() == Instruction::SRem;
+    if (!isSigned && Rem->getOpcode() != Instruction::URem)
+      continue;
+
+    // We're only interested in the case where we know something about
+    // the numerator.
+    if (UI.getOperandValToReplace() != Rem->getOperand(0))
+      continue;
+
+    // Get the SCEVs for the ICmp operands.
+    const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+    const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+    // Simplify unnecessary loops away.
+    const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+    S = SE->getSCEVAtScope(S, ICmpLoop);
+    X = SE->getSCEVAtScope(X, ICmpLoop);
+
+    // i % n  -->  i  if i is in [0,n).
+    if ((!isSigned || SE->isKnownNonNegative(S)) &&
+        SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                             S, X))
+      Rem->replaceAllUsesWith(Rem->getOperand(0));
+    else {
+      // (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
+      const SCEV *LessOne =
+        SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+      if ((!isSigned || SE->isKnownNonNegative(LessOne)) &&
+          SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                               LessOne, X)) {
+        ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+                                      Rem->getOperand(0), Rem->getOperand(1),
+                                      "tmp");
+        SelectInst *Sel =
+          SelectInst::Create(ICmp,
+                             ConstantInt::get(Rem->getType(), 0),
+                             Rem->getOperand(0), "tmp", Rem);
+        Rem->replaceAllUsesWith(Sel);
+      } else
+        continue;
+    }
+
+    // Inform IVUsers about the new users.
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+
+    DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+    DeadInsts.push_back(Rem);
+  }
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
@@ -362,6 +492,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     RewriteLoopExitValues(L, Rewriter);
 
+  // Simplify ICmp IV users.
+  EliminateIVComparisons();
+
+  // Simplify SRem and URem IV users.
+  EliminateIVRemainders();
+
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
   const Type *LargestType = 0;
@@ -454,6 +590,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   return Changed;
 }
 
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L) {
+  // Loop-invariant values are safe.
+  if (S->isLoopInvariant(L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L)) return false;
+    return true;
+  }
+  
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L) &&
+           isSafe(UD->getRHS(), L);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   SmallVector<WeakVH, 16> DeadInsts;
 
@@ -465,7 +641,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
     Value *Op = UI->getOperandValToReplace();
     const Type *UseTy = Op->getType();
     Instruction *User = UI->getUser();
@@ -486,7 +661,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // currently can only reduce affine polynomials.  For now just disable
     // indvar subst on anything more complex than an affine addrec, unless
     // it can be expanded to a trivial value.
-    if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+    if (!isSafe(AR, L))
       continue;
 
     // Determine the insertion point for this user. By default, insert
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index a6489ec..df05b71 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -670,8 +670,10 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
     Value *OldCond = DestBI->getCondition();
     DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
                                           BranchDir));
-    ConstantFoldTerminator(BB);
+    // Delete dead instructions before we fold the branch.  Folding the branch
+    // can eliminate edges from the CFG which can end up deleting OldCond.
     RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+    ConstantFoldTerminator(BB);
     return true;
   }
  
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d7ace342..7347395 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -683,16 +683,18 @@ void LICM::PromoteValuesInLoop() {
       // to LI as we are loading or storing.  Since we know that the value is
       // stored in this loop, this will always succeed.
       for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
-           UI != E; ++UI)
-        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+           UI != E; ++UI) {
+        User *U = *UI;
+        if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
           LoadValue = LI;
           break;
-        } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+        } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
           if (SI->getOperand(1) == Ptr) {
             LoadValue = SI->getOperand(0);
             break;
           }
         }
+      }
       assert(LoadValue && "No store through the pointer found!");
       PointerValueNumbers.push_back(LoadValue);  // Remember this for later.
     }
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 16d3f2f..101ff5b 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -948,6 +948,25 @@ bool LoopIndexSplit::splitLoop() {
   if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
     return false;
 
+  // Check for side effects.
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I) {
+    BasicBlock *BB = *I;
+
+    assert(DT->dominates(Header, BB));
+    if (DT->properlyDominates(SplitCondition->getParent(), BB))
+      continue;
+
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+	 BI != BE; ++BI) {
+      Instruction *Inst = BI;
+
+      if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
+	  && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
+        return false;
+    }
+  }
+
   // Normalize loop conditions so that it is easier to calculate new loop
   // bounds.
   if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 625a75d..cf3d16f 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -221,7 +221,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
     if (!AR->getStart()->isZero()) {
       DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
-      DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+      DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                       AR->getStepRecurrence(SE),
                                       AR->getLoop()),
                      L, Good, Bad, SE, DT);
@@ -262,11 +262,15 @@ void Formula::InitialMatch(const SCEV *S, Loop *L,
   SmallVector<const SCEV *, 4> Bad;
   DoInitialMatch(S, L, Good, Bad, SE, DT);
   if (!Good.empty()) {
-    BaseRegs.push_back(SE.getAddExpr(Good));
+    const SCEV *Sum = SE.getAddExpr(Good);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
     AM.HasBaseReg = true;
   }
   if (!Bad.empty()) {
-    BaseRegs.push_back(SE.getAddExpr(Bad));
+    const SCEV *Sum = SE.getAddExpr(Bad);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
     AM.HasBaseReg = true;
   }
 }
@@ -375,7 +379,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
                                 bool IgnoreSignificantBits = false) {
   // Handle the trivial case, which works for any SCEV type.
   if (LHS == RHS)
-    return SE.getIntegerSCEV(1, LHS->getType());
+    return SE.getConstant(LHS->getType(), 1);
 
   // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
   // folding.
@@ -450,7 +454,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
 static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
     if (C->getValue()->getValue().getMinSignedBits() <= 64) {
-      S = SE.getIntegerSCEV(0, C->getType());
+      S = SE.getConstant(C->getType(), 0);
       return C->getValue()->getSExtValue();
     }
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
@@ -473,7 +477,7 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
 static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
-      S = SE.getIntegerSCEV(0, GV->getType());
+      S = SE.getConstant(GV->getType(), 0);
       return GV;
     }
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
@@ -781,10 +785,10 @@ struct LSRFixup {
   /// will be replaced.
   Value *OperandValToReplace;
 
-  /// PostIncLoop - If this user is to use the post-incremented value of an
+  /// PostIncLoops - If this user is to use the post-incremented value of an
   /// induction variable, this variable is non-null and holds the loop
   /// associated with the induction variable.
-  const Loop *PostIncLoop;
+  PostIncLoopSet PostIncLoops;
 
   /// LUIdx - The index of the LSRUse describing the expression which
   /// this fixup needs, minus an offset (below).
@@ -795,6 +799,8 @@ struct LSRFixup {
   /// offsets, for example in an unrolled loop.
   int64_t Offset;
 
+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
   LSRFixup();
 
   void print(raw_ostream &OS) const;
@@ -804,9 +810,24 @@ struct LSRFixup {
 }
 
 LSRFixup::LSRFixup()
-  : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+  : UserInst(0), OperandValToReplace(0),
     LUIdx(~size_t(0)), Offset(0) {}
 
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+  // PHI nodes use their value in their incoming blocks.
+  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == OperandValToReplace &&
+          L->contains(PN->getIncomingBlock(i)))
+        return false;
+    return true;
+  }
+
+  return !L->contains(UserInst);
+}
+
 void LSRFixup::print(raw_ostream &OS) const {
   OS << "UserInst=";
   // Store is common and interesting enough to be worth special-casing.
@@ -821,9 +842,10 @@ void LSRFixup::print(raw_ostream &OS) const {
   OS << ", OperandValToReplace=";
   WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
 
-  if (PostIncLoop) {
+  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+       E = PostIncLoops.end(); I != E; ++I) {
     OS << ", PostIncLoop=";
-    WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
+    WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
   }
 
   if (LUIdx != ~size_t(0))
@@ -1135,6 +1157,7 @@ class LSRInstance {
   IVUsers &IU;
   ScalarEvolution &SE;
   DominatorTree &DT;
+  LoopInfo &LI;
   const TargetLowering *const TLI;
   Loop *const L;
   bool Changed;
@@ -1214,6 +1237,13 @@ public:
                     DenseSet<const SCEV *> &VisitedRegs) const;
   void Solve(SmallVectorImpl<const Formula *> &Solution) const;
 
+  BasicBlock::iterator
+    HoistInsertPosition(BasicBlock::iterator IP,
+                        const SmallVectorImpl<Instruction *> &Inputs) const;
+  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                                     const LSRFixup &LF,
+                                                     const LSRUse &LU) const;
+
   Value *Expand(const LSRFixup &LF,
                 const Formula &F,
                 BasicBlock::iterator IP,
@@ -1427,16 +1457,30 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return Cond;
-  const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
 
   // Add one to the backedge-taken count to get the trip count.
   const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
-
-  // Check for a max calculation that matches the pattern.
-  if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
+  if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+  // Check for a max calculation that matches the pattern. There's no check
+  // for ICMP_ULE here because the comparison would be with zero, which
+  // isn't interesting.
+  CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+  const SCEVNAryExpr *Max = 0;
+  if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+    Pred = ICmpInst::ICMP_SLE;
+    Max = S;
+  } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_SLT;
+    Max = S;
+  } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_ULT;
+    Max = U;
+  } else {
+    // No match; bail.
     return Cond;
-  const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
-  if (Max != SE.getSCEV(Sel)) return Cond;
+  }
 
   // To handle a max with more than two operands, this optimization would
   // require additional checking and setup.
@@ -1445,7 +1489,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
 
   const SCEV *MaxLHS = Max->getOperand(0);
   const SCEV *MaxRHS = Max->getOperand(1);
-  if (!MaxLHS || MaxLHS != One) return Cond;
+
+  // ScalarEvolution canonicalizes constants to the left. For < and >, look
+  // for a comparison with 1. For <= and >=, a comparison with zero.
+  if (!MaxLHS ||
+      (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+    return Cond;
+
   // Check the relevant induction variable for conformance to
   // the pattern.
   const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
@@ -1461,16 +1511,29 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   // Check the right operand of the select, and remember it, as it will
   // be used in the new comparison instruction.
   Value *NewRHS = 0;
-  if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+  if (ICmpInst::isTrueWhenEqual(Pred)) {
+    // Look for n+1, and grab n.
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (!NewRHS)
+      return Cond;
+  } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
     NewRHS = Sel->getOperand(1);
   else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
     NewRHS = Sel->getOperand(2);
-  if (!NewRHS) return Cond;
+  else
+    llvm_unreachable("Max doesn't match expected pattern!");
 
   // Determine the new comparison opcode. It may be signed or unsigned,
   // and the original comparison may be either equality or inequality.
-  CmpInst::Predicate Pred =
-    isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
   if (Cond->getPredicate() == CmpInst::ICMP_EQ)
     Pred = CmpInst::getInversePredicate(Pred);
 
@@ -1545,8 +1608,9 @@ LSRInstance::OptimizeLoopTermCond() {
             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
           // Conservatively assume there may be reuse if the quotient of their
           // strides could be a legal scale.
-          const SCEV *A = CondUse->getStride();
-          const SCEV *B = UI->getStride();
+          const SCEV *A = IU.getStride(*CondUse, L);
+          const SCEV *B = IU.getStride(*UI, L);
+          if (!A || !B) continue;
           if (SE.getTypeSizeInBits(A->getType()) !=
               SE.getTypeSizeInBits(B->getType())) {
             if (SE.getTypeSizeInBits(A->getType()) >
@@ -1598,8 +1662,7 @@ LSRInstance::OptimizeLoopTermCond() {
         ExitingBlock->getInstList().insert(TermBr, Cond);
 
         // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
-                              Cond, CondUse->getOperandValToReplace());
+        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
         TermBr->replaceUsesOfWith(OldCond, Cond);
       }
     }
@@ -1607,9 +1670,7 @@ LSRInstance::OptimizeLoopTermCond() {
     // If we get to here, we know that we can transform the setcc instruction to
     // use the post-incremented version of the IV, allowing us to coalesce the
     // live ranges for the IV correctly.
-    CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
-                                       CondUse->getStride()));
-    CondUse->setIsUseOfPostIncrementedValue(true);
+    CondUse->transformToPostInc(L);
     Changed = true;
 
     PostIncs.insert(Cond);
@@ -1717,19 +1778,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
   SmallSetVector<const SCEV *, 4> Strides;
 
   // Collect interesting types and strides.
+  SmallVector<const SCEV *, 4> Worklist;
   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
+    const SCEV *Expr = IU.getExpr(*UI);
 
     // Collect interesting types.
-    Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
-
-    // Add the stride for this loop.
-    Strides.insert(Stride);
-
-    // Add strides for other mentioned loops.
-    for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
-         AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
-      Strides.insert(AR->getStepRecurrence(SE));
+    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+    // Add strides for mentioned loops.
+    Worklist.push_back(Expr);
+    do {
+      const SCEV *S = Worklist.pop_back_val();
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+        Strides.insert(AR->getStepRecurrence(SE));
+        Worklist.push_back(AR->getStart());
+      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+        Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+      }
+    } while (!Worklist.empty());
   }
 
   // Compute interesting factors from the set of interesting strides.
@@ -1776,8 +1842,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     LSRFixup &LF = getNewFixup();
     LF.UserInst = UI->getUser();
     LF.OperandValToReplace = UI->getOperandValToReplace();
-    if (UI->isUseOfPostIncrementedValue())
-      LF.PostIncLoop = L;
+    LF.PostIncLoops = UI->getPostIncLoops();
 
     LSRUse::KindType Kind = LSRUse::Basic;
     const Type *AccessTy = 0;
@@ -1786,7 +1851,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
       AccessTy = getAccessType(LF.UserInst);
     }
 
-    const SCEV *S = IU.getCanonicalExpr(*UI);
+    const SCEV *S = IU.getExpr(*UI);
 
     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
     // (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -1824,7 +1889,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     LF.LUIdx = P.first;
     LF.Offset = P.second;
     LSRUse &LU = Uses[LF.LUIdx];
-    LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
 
     // If this is the first use of this LSRUse, give it a formula.
     if (LU.Formulae.empty()) {
@@ -1918,9 +1983,17 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
           continue;
         // Ignore uses which are part of other SCEV expressions, to avoid
         // analyzing them multiple times.
-        if (SE.isSCEVable(UserInst->getType()) &&
-            !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
-          continue;
+        if (SE.isSCEVable(UserInst->getType())) {
+          const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
+          // If the user is a no-op, look through to its uses.
+          if (!isa<SCEVUnknown>(UserS))
+            continue;
+          if (UserS == U) {
+            Worklist.push_back(
+              SE.getUnknown(const_cast<Instruction *>(UserInst)));
+            continue;
+          }
+        }
         // Ignore icmp instructions which are already being analyzed.
         if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
           unsigned OtherIdx = !UI.getOperandNo();
@@ -1936,7 +2009,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
         LF.LUIdx = P.first;
         LF.Offset = P.second;
         LSRUse &LU = Uses[LF.LUIdx];
-        LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
         InsertSupplementalFormula(U, LU, LF.LUIdx);
         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
         break;
@@ -1959,7 +2032,7 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     // Split a non-zero base out of an addrec.
     if (!AR->getStart()->isZero()) {
-      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+      CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                        AR->getStepRecurrence(SE),
                                        AR->getLoop()), C, Ops, SE);
       CollectSubexprs(AR->getStart(), C, Ops, SE);
@@ -2020,8 +2093,11 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
                            LU.Kind, LU.AccessTy, TLI, SE))
         continue;
 
+      const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+      if (InnerSum->isZero())
+        continue;
       Formula F = Base;
-      F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+      F.BaseRegs[i] = InnerSum;
       F.BaseRegs.push_back(*J);
       if (InsertFormula(LU, LUIdx, F))
         // If that formula hadn't been seen before, recurse to find more like
@@ -2102,7 +2178,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
       F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
       if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
                      LU.Kind, LU.AccessTy, TLI)) {
-        F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType()));
+        F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
 
         (void)InsertFormula(LU, LUIdx, F);
       }
@@ -2165,7 +2241,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
     // Compensate for the use having MinOffset built into it.
     F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
 
-    const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+    const SCEV *FactorS = SE.getConstant(IntTy, Factor);
 
     // Check that multiplying with each base register doesn't overflow.
     for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
@@ -2227,7 +2303,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
       if (const SCEVAddRecExpr *AR =
             dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
-        const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+        const SCEV *FactorS = SE.getConstant(IntTy, Factor);
         if (FactorS->isZero())
           continue;
         // Divide out the factor, ignoring high bits, since we'll be
@@ -2426,7 +2502,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           if (C->getValue()->getValue().isNegative() !=
                 (NewF.AM.BaseOffs < 0) &&
               (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
-                .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+                .ule(abs64(NewF.AM.BaseOffs)))
             continue;
 
         // OK, looks good.
@@ -2454,7 +2530,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
               if (C->getValue()->getValue().isNegative() !=
                     (NewF.AM.BaseOffs < 0) &&
                   C->getValue()->getValue().abs()
-                    .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+                    .ule(abs64(NewF.AM.BaseOffs)))
                 goto skip_formula;
 
           // Ok, looks good.
@@ -2776,37 +2852,33 @@ static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
   return Node->getBlock();
 }
 
-Value *LSRInstance::Expand(const LSRFixup &LF,
-                           const Formula &F,
-                           BasicBlock::iterator IP,
-                           SCEVExpander &Rewriter,
-                           SmallVectorImpl<WeakVH> &DeadInsts) const {
-  const LSRUse &LU = Uses[LF.LUIdx];
-
-  // Then, collect some instructions which we will remain dominated by when
-  // expanding the replacement. These must be dominated by any operands that
-  // will be required in the expansion.
-  SmallVector<Instruction *, 4> Inputs;
-  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
-    Inputs.push_back(I);
-  if (LU.Kind == LSRUse::ICmpZero)
-    if (Instruction *I =
-          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
-      Inputs.push_back(I);
-  if (LF.PostIncLoop) {
-    if (!L->contains(LF.UserInst))
-      Inputs.push_back(L->getLoopLatch()->getTerminator());
-    else
-      Inputs.push_back(IVIncInsertPos);
-  }
-
-  // Then, climb up the immediate dominator tree as far as we can go while
-  // still being dominated by the input positions.
+/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
+/// the dominator tree far as we can go while still being dominated by the
+/// input positions. This helps canonicalize the insert position, which
+/// encourages sharing.
+BasicBlock::iterator
+LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
+                                 const SmallVectorImpl<Instruction *> &Inputs)
+                                                                         const {
   for (;;) {
+    const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+    unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+    BasicBlock *IDom;
+    for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) {
+      IDom = getImmediateDominator(Rung, DT);
+      if (!IDom) return IP;
+
+      // Don't climb into a loop though.
+      const Loop *IDomLoop = LI.getLoopFor(IDom);
+      unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+      if (IDomDepth <= IPLoopDepth &&
+          (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+        break;
+    }
+
     bool AllDominate = true;
     Instruction *BetterPos = 0;
-    BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
-    if (!IDom) break;
     Instruction *Tentative = IDom->getTerminator();
     for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
          E = Inputs.end(); I != E; ++I) {
@@ -2815,6 +2887,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
         AllDominate = false;
         break;
       }
+      // Attempt to find an insert position in the middle of the block,
+      // instead of at the end, so that it can be used for other expansions.
       if (IDom == Inst->getParent() &&
           (!BetterPos || DT.dominates(BetterPos, Inst)))
         BetterPos = next(BasicBlock::iterator(Inst));
@@ -2826,12 +2900,77 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
     else
       IP = Tentative;
   }
+
+  return IP;
+}
+
+/// AdjustInsertPositionForExpand - Determine an input position which will be
+/// dominated by the operands and which will dominate the result.
+BasicBlock::iterator
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                           const LSRFixup &LF,
+                                           const LSRUse &LU) const {
+  // Collect some instructions which must be dominated by the
+  // expanding replacement. These must be dominated by any operands that
+  // will be required in the expansion.
+  SmallVector<Instruction *, 4> Inputs;
+  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+    Inputs.push_back(I);
+  if (LU.Kind == LSRUse::ICmpZero)
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+      Inputs.push_back(I);
+  if (LF.PostIncLoops.count(L)) {
+    if (LF.isUseFullyOutsideLoop(L))
+      Inputs.push_back(L->getLoopLatch()->getTerminator());
+    else
+      Inputs.push_back(IVIncInsertPos);
+  }
+  // The expansion must also be dominated by the increment positions of any
+  // loops it for which it is using post-inc mode.
+  for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
+       E = LF.PostIncLoops.end(); I != E; ++I) {
+    const Loop *PIL = *I;
+    if (PIL == L) continue;
+
+    // Be dominated by the loop exit.
+    SmallVector<BasicBlock *, 4> ExitingBlocks;
+    PIL->getExitingBlocks(ExitingBlocks);
+    if (!ExitingBlocks.empty()) {
+      BasicBlock *BB = ExitingBlocks[0];
+      for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
+        BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
+      Inputs.push_back(BB->getTerminator());
+    }
+  }
+
+  // Then, climb up the immediate dominator tree as far as we can go while
+  // still being dominated by the input positions.
+  IP = HoistInsertPosition(IP, Inputs);
+
+  // Don't insert instructions before PHI nodes.
   while (isa<PHINode>(IP)) ++IP;
+
+  // Ignore debug intrinsics.
   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
 
+  return IP;
+}
+
+Value *LSRInstance::Expand(const LSRFixup &LF,
+                           const Formula &F,
+                           BasicBlock::iterator IP,
+                           SCEVExpander &Rewriter,
+                           SmallVectorImpl<WeakVH> &DeadInsts) const {
+  const LSRUse &LU = Uses[LF.LUIdx];
+
+  // Determine an input position which will be dominated by the operands and
+  // which will dominate the result.
+  IP = AdjustInsertPositionForExpand(IP, LF, LU);
+
   // Inform the Rewriter if we have a post-increment use, so that it can
   // perform an advantageous expansion.
-  Rewriter.setPostInc(LF.PostIncLoop);
+  Rewriter.setPostInc(LF.PostIncLoops);
 
   // This is the type that the user actually needs.
   const Type *OpTy = LF.OperandValToReplace->getType();
@@ -2855,24 +2994,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
     const SCEV *Reg = *I;
     assert(!Reg->isZero() && "Zero allocated in a base register!");
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    const SCEV *Start = Reg;
-    while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
-      if (AR->getLoop() == LF.PostIncLoop) {
-        Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
-        // If the user is inside the loop, insert the code after the increment
-        // so that it is dominated by its operand. If the original insert point
-        // was already dominated by the increment, keep it, because there may
-        // be loop-variant operands that need to be respected also.
-        if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
-          IP = IVIncInsertPos;
-          while (isa<DbgInfoIntrinsic>(IP)) ++IP;
-        }
-        break;
-      }
-      Start = AR->getStart();
-    }
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    Reg = TransformForPostIncUse(Denormalize, Reg,
+                                 LF.UserInst, LF.OperandValToReplace,
+                                 Loops, SE, DT);
 
     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
   }
@@ -2889,11 +3015,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   if (F.AM.Scale != 0) {
     const SCEV *ScaledS = F.ScaledReg;
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
-      if (AR->getLoop() == LF.PostIncLoop)
-        ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+                                     LF.UserInst, LF.OperandValToReplace,
+                                     Loops, SE, DT);
 
     if (LU.Kind == LSRUse::ICmpZero) {
       // An interesting way of "folding" with an icmp is to use a negated
@@ -2907,8 +3033,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // which is expected to be matched as part of the address.
       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
       ScaledS = SE.getMulExpr(ScaledS,
-                              SE.getIntegerSCEV(F.AM.Scale,
-                                                ScaledS->getType()));
+                              SE.getConstant(ScaledS->getType(), F.AM.Scale));
       Ops.push_back(ScaledS);
 
       // Flush the operand list to suppress SCEVExpander hoisting.
@@ -2949,12 +3074,12 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
 
   // Emit instructions summing all the operands.
   const SCEV *FullS = Ops.empty() ?
-                      SE.getIntegerSCEV(0, IntTy) :
+                      SE.getConstant(IntTy, 0) :
                       SE.getAddExpr(Ops);
   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
 
   // We're done expanding now, so reset the rewriter.
-  Rewriter.setPostInc(0);
+  Rewriter.clearPostInc();
 
   // An ICmpZero Formula represents an ICmp which we're handling as a
   // comparison against zero. Now that we've expanded an expression for that
@@ -3118,6 +3243,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   : IU(P->getAnalysis<IVUsers>()),
     SE(P->getAnalysis<ScalarEvolution>()),
     DT(P->getAnalysis<DominatorTree>()),
+    LI(P->getAnalysis<LoopInfo>()),
     TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
 
   // If LoopSimplify form is not available, stay out of trouble.
@@ -3274,9 +3400,10 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // We split critical edges, so we change the CFG.  However, we do update
   // many analyses if they are around.
   AU.addPreservedID(LoopSimplifyID);
-  AU.addPreserved<LoopInfo>();
   AU.addPreserved("domfrontier");
 
+  AU.addRequired<LoopInfo>();
+  AU.addPreserved<LoopInfo>();
   AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<DominatorTree>();
   AU.addPreserved<DominatorTree>();
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 3918738..ae7bf40 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
@@ -677,15 +678,22 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   LoopProcessWorklist.push_back(NewLoop);
   redoLoop = true;
 
+  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
+  // deletes the instruction (for example by simplifying a PHI that feeds into
+  // the condition that we're unswitching on), we don't rewrite the second
+  // iteration.
+  WeakVH LICHandle(LIC);
+  
   // Now we rewrite the original code to know that the condition is true and the
   // new code to know that the condition is false.
   RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
-  
-  // It's possible that simplifying one loop could cause the other to be
-  // deleted.  If so, don't simplify it.
-  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop)
-    RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true);
 
+  // It's possible that simplifying one loop could cause the other to be
+  // changed to another value or a constant.  If its a constant, don't simplify
+  // it.
+  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+      LICHandle && !isa<Constant>(LICHandle))
+    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
 }
 
 /// RemoveFromWorklist - Remove all instances of I from the worklist vector
@@ -981,45 +989,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
       continue;
     }
     
+    // See if instruction simplification can hack this up.  This is common for
+    // things like "select false, X, Y" after unswitching made the condition be
+    // 'false'.
+    if (Value *V = SimplifyInstruction(I)) {
+      ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+      continue;
+    }
+    
     // Special case hacks that appear commonly in unswitched code.
-    switch (I->getOpcode()) {
-    case Instruction::Select:
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(0))) {
-        ReplaceUsesOfWith(I, I->getOperand(!CB->getZExtValue()+1), Worklist, L,
-                          LPM);
-        continue;
-      }
-      break;
-    case Instruction::And:
-      if (isa<ConstantInt>(I->getOperand(0)) && 
-          // constant -> RHS
-          I->getOperand(0)->getType()->isIntegerTy(1))
-        cast<BinaryOperator>(I)->swapOperands();
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 
-        if (CB->getType()->isIntegerTy(1)) {
-          if (CB->isOne())      // X & 1 -> X
-            ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
-          else                  // X & 0 -> 0
-            ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
-          continue;
-        }
-      break;
-    case Instruction::Or:
-      if (isa<ConstantInt>(I->getOperand(0)) &&
-          // constant -> RHS
-          I->getOperand(0)->getType()->isIntegerTy(1))
-        cast<BinaryOperator>(I)->swapOperands();
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
-        if (CB->getType()->isIntegerTy(1)) {
-          if (CB->isOne())   // X | 1 -> 1
-            ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
-          else                  // X | 0 -> X
-            ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
-          continue;
-        }
-      break;
-    case Instruction::Br: {
-      BranchInst *BI = cast<BranchInst>(I);
+    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
       if (BI->isUnconditional()) {
         // If BI's parent is the only pred of the successor, fold the two blocks
         // together.
@@ -1052,13 +1031,13 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         LPM->deleteSimpleAnalysisValue(Succ, L);
         Succ->eraseFromParent();
         ++NumSimplify;
-        break;
+        continue;
       }
       
       if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
         // Conditional branch.  Turn it into an unconditional branch, then
         // remove dead blocks.
-        break;  // FIXME: Enable.
+        continue;  // FIXME: Enable.
 
         DEBUG(dbgs() << "Folded branch: " << *BI);
         BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
@@ -1072,8 +1051,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
 
         RemoveBlockIfDead(DeadSucc, Worklist, L);
       }
-      break;
-    }
+      continue;
     }
   }
 }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3b305ae..3611b8e 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -744,7 +744,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   const Type *ArgTys[3] = { M->getRawDest()->getType(),
                             M->getRawSource()->getType(),
                             M->getLength()->getType() };
-  M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
+  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 7a6eec3..13222ac 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -46,10 +46,11 @@ namespace {
    bool valueEscapes(const Instruction *Inst) const {
      const BasicBlock *BB = Inst->getParent();
       for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
-           UI != E; ++UI)
-        if (cast<Instruction>(*UI)->getParent() != BB ||
-            isa<PHINode>(*UI))
+           UI != E; ++UI) {
+        const Instruction *I = cast<Instruction>(*UI);
+        if (I->getParent() != BB || isa<PHINode>(I))
           return true;
+      }
       return false;
     }
 
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 4f09bee..907ece8 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -317,7 +317,10 @@ private:
   void markConstant(LatticeVal &IV, Value *V, Constant *C) {
     if (!IV.markConstant(C)) return;
     DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
-    InstWorkList.push_back(V);
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
   }
   
   void markConstant(Value *V, Constant *C) {
@@ -327,9 +330,13 @@ private:
 
   void markForcedConstant(Value *V, Constant *C) {
     assert(!V->getType()->isStructTy() && "Should use other method");
-    ValueState[V].markForcedConstant(C);
+    LatticeVal &IV = ValueState[V];
+    IV.markForcedConstant(C);
     DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
-    InstWorkList.push_back(V);
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
   }
   
   
@@ -1445,6 +1452,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // After a zero extend, we know the top part is zero.  SExt doesn't have
         // to be handled here, because we don't know whether the top part is 1's
         // or 0's.
+      case Instruction::SIToFP:  // some FP values are not possible, just use 0.
+      case Instruction::UIToFP:  // some FP values are not possible, just use 0.
         markForcedConstant(I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Mul:
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
deleted file mode 100644
index 9685a29..0000000
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ /dev/null
@@ -1,716 +0,0 @@
-//===- SCCVN.cpp - Eliminate redundant values -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs global value numbering to eliminate fully redundant
-// instructions.  This is based on the paper "SCC-based Value Numbering"
-// by Cooper, et al.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sccvn"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Operator.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-using namespace llvm;
-
-STATISTIC(NumSCCVNInstr,  "Number of instructions deleted by SCCVN");
-STATISTIC(NumSCCVNPhi,  "Number of phis deleted by SCCVN");
-
-//===----------------------------------------------------------------------===//
-//                         ValueTable Class
-//===----------------------------------------------------------------------===//
-
-/// This class holds the mapping between values and value numbers.  It is used
-/// as an efficient mechanism to determine the expression-wise equivalence of
-/// two values.
-namespace {
-  struct Expression {
-    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
-                            UDIV, SDIV, FDIV, UREM, SREM,
-                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
-                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
-                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
-                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
-                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
-                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
-                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
-                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
-                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
-                            INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
-
-    ExpressionOpcode opcode;
-    const Type* type;
-    SmallVector<uint32_t, 4> varargs;
-
-    Expression() { }
-    Expression(ExpressionOpcode o) : opcode(o) { }
-
-    bool operator==(const Expression &other) const {
-      if (opcode != other.opcode)
-        return false;
-      else if (opcode == EMPTY || opcode == TOMBSTONE)
-        return true;
-      else if (type != other.type)
-        return false;
-      else {
-        if (varargs.size() != other.varargs.size())
-          return false;
-
-        for (size_t i = 0; i < varargs.size(); ++i)
-          if (varargs[i] != other.varargs[i])
-            return false;
-
-        return true;
-      }
-    }
-
-    bool operator!=(const Expression &other) const {
-      return !(*this == other);
-    }
-  };
-
-  class ValueTable {
-    private:
-      DenseMap<Value*, uint32_t> valueNumbering;
-      DenseMap<Expression, uint32_t> expressionNumbering;
-      DenseMap<Value*, uint32_t> constantsNumbering;
-
-      uint32_t nextValueNumber;
-
-      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
-      Expression::ExpressionOpcode getOpcode(CmpInst* C);
-      Expression::ExpressionOpcode getOpcode(CastInst* C);
-      Expression create_expression(BinaryOperator* BO);
-      Expression create_expression(CmpInst* C);
-      Expression create_expression(ShuffleVectorInst* V);
-      Expression create_expression(ExtractElementInst* C);
-      Expression create_expression(InsertElementInst* V);
-      Expression create_expression(SelectInst* V);
-      Expression create_expression(CastInst* C);
-      Expression create_expression(GetElementPtrInst* G);
-      Expression create_expression(CallInst* C);
-      Expression create_expression(Constant* C);
-      Expression create_expression(ExtractValueInst* C);
-      Expression create_expression(InsertValueInst* C);
-    public:
-      ValueTable() : nextValueNumber(1) { }
-      uint32_t computeNumber(Value *V);
-      uint32_t lookup(Value *V);
-      void add(Value *V, uint32_t num);
-      void clear();
-      void clearExpressions();
-      void erase(Value *v);
-      unsigned size();
-      void verifyRemoved(const Value *) const;
-  };
-}
-
-namespace llvm {
-template <> struct DenseMapInfo<Expression> {
-  static inline Expression getEmptyKey() {
-    return Expression(Expression::EMPTY);
-  }
-
-  static inline Expression getTombstoneKey() {
-    return Expression(Expression::TOMBSTONE);
-  }
-
-  static unsigned getHashValue(const Expression e) {
-    unsigned hash = e.opcode;
-
-    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
-            (unsigned)((uintptr_t)e.type >> 9));
-
-    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
-         E = e.varargs.end(); I != E; ++I)
-      hash = *I + hash * 37;
-
-    return hash;
-  }
-  static bool isEqual(const Expression &LHS, const Expression &RHS) {
-    return LHS == RHS;
-  }
-};
-template <>
-struct isPodLike<Expression> { static const bool value = true; };
-
-}
-
-//===----------------------------------------------------------------------===//
-//                     ValueTable Internal Functions
-//===----------------------------------------------------------------------===//
-Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
-  switch(BO->getOpcode()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    llvm_unreachable("Binary operator with unknown opcode?");
-  case Instruction::Add:  return Expression::ADD;
-  case Instruction::FAdd: return Expression::FADD;
-  case Instruction::Sub:  return Expression::SUB;
-  case Instruction::FSub: return Expression::FSUB;
-  case Instruction::Mul:  return Expression::MUL;
-  case Instruction::FMul: return Expression::FMUL;
-  case Instruction::UDiv: return Expression::UDIV;
-  case Instruction::SDiv: return Expression::SDIV;
-  case Instruction::FDiv: return Expression::FDIV;
-  case Instruction::URem: return Expression::UREM;
-  case Instruction::SRem: return Expression::SREM;
-  case Instruction::FRem: return Expression::FREM;
-  case Instruction::Shl:  return Expression::SHL;
-  case Instruction::LShr: return Expression::LSHR;
-  case Instruction::AShr: return Expression::ASHR;
-  case Instruction::And:  return Expression::AND;
-  case Instruction::Or:   return Expression::OR;
-  case Instruction::Xor:  return Expression::XOR;
-  }
-}
-
-Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
-  if (isa<ICmpInst>(C)) {
-    switch (C->getPredicate()) {
-    default:  // THIS SHOULD NEVER HAPPEN
-      llvm_unreachable("Comparison with unknown predicate?");
-    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
-    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
-    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
-    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
-    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
-    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
-    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
-    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
-    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
-    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
-    }
-  } else {
-    switch (C->getPredicate()) {
-    default: // THIS SHOULD NEVER HAPPEN
-      llvm_unreachable("Comparison with unknown predicate?");
-    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
-    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
-    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
-    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
-    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
-    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
-    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
-    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
-    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
-    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
-    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
-    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
-    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
-    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
-    }
-  }
-}
-
-Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
-  switch(C->getOpcode()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    llvm_unreachable("Cast operator with unknown opcode?");
-  case Instruction::Trunc:    return Expression::TRUNC;
-  case Instruction::ZExt:     return Expression::ZEXT;
-  case Instruction::SExt:     return Expression::SEXT;
-  case Instruction::FPToUI:   return Expression::FPTOUI;
-  case Instruction::FPToSI:   return Expression::FPTOSI;
-  case Instruction::UIToFP:   return Expression::UITOFP;
-  case Instruction::SIToFP:   return Expression::SITOFP;
-  case Instruction::FPTrunc:  return Expression::FPTRUNC;
-  case Instruction::FPExt:    return Expression::FPEXT;
-  case Instruction::PtrToInt: return Expression::PTRTOINT;
-  case Instruction::IntToPtr: return Expression::INTTOPTR;
-  case Instruction::BitCast:  return Expression::BITCAST;
-  }
-}
-
-Expression ValueTable::create_expression(CallInst* C) {
-  Expression e;
-
-  e.type = C->getType();
-  e.opcode = Expression::CALL;
-
-  e.varargs.push_back(lookup(C->getCalledFunction()));
-  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
-       I != E; ++I)
-    e.varargs.push_back(lookup(*I));
-
-  return e;
-}
-
-Expression ValueTable::create_expression(BinaryOperator* BO) {
-  Expression e;
-  e.varargs.push_back(lookup(BO->getOperand(0)));
-  e.varargs.push_back(lookup(BO->getOperand(1)));
-  e.type = BO->getType();
-  e.opcode = getOpcode(BO);
-
-  return e;
-}
-
-Expression ValueTable::create_expression(CmpInst* C) {
-  Expression e;
-
-  e.varargs.push_back(lookup(C->getOperand(0)));
-  e.varargs.push_back(lookup(C->getOperand(1)));
-  e.type = C->getType();
-  e.opcode = getOpcode(C);
-
-  return e;
-}
-
-Expression ValueTable::create_expression(CastInst* C) {
-  Expression e;
-
-  e.varargs.push_back(lookup(C->getOperand(0)));
-  e.type = C->getType();
-  e.opcode = getOpcode(C);
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ShuffleVectorInst* S) {
-  Expression e;
-
-  e.varargs.push_back(lookup(S->getOperand(0)));
-  e.varargs.push_back(lookup(S->getOperand(1)));
-  e.varargs.push_back(lookup(S->getOperand(2)));
-  e.type = S->getType();
-  e.opcode = Expression::SHUFFLE;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ExtractElementInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup(E->getOperand(0)));
-  e.varargs.push_back(lookup(E->getOperand(1)));
-  e.type = E->getType();
-  e.opcode = Expression::EXTRACT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(InsertElementInst* I) {
-  Expression e;
-
-  e.varargs.push_back(lookup(I->getOperand(0)));
-  e.varargs.push_back(lookup(I->getOperand(1)));
-  e.varargs.push_back(lookup(I->getOperand(2)));
-  e.type = I->getType();
-  e.opcode = Expression::INSERT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(SelectInst* I) {
-  Expression e;
-
-  e.varargs.push_back(lookup(I->getCondition()));
-  e.varargs.push_back(lookup(I->getTrueValue()));
-  e.varargs.push_back(lookup(I->getFalseValue()));
-  e.type = I->getType();
-  e.opcode = Expression::SELECT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(GetElementPtrInst* G) {
-  Expression e;
-
-  e.varargs.push_back(lookup(G->getPointerOperand()));
-  e.type = G->getType();
-  e.opcode = Expression::GEP;
-
-  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
-       I != E; ++I)
-    e.varargs.push_back(lookup(*I));
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ExtractValueInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup(E->getAggregateOperand()));
-  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-       II != IE; ++II)
-    e.varargs.push_back(*II);
-  e.type = E->getType();
-  e.opcode = Expression::EXTRACTVALUE;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(InsertValueInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup(E->getAggregateOperand()));
-  e.varargs.push_back(lookup(E->getInsertedValueOperand()));
-  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-       II != IE; ++II)
-    e.varargs.push_back(*II);
-  e.type = E->getType();
-  e.opcode = Expression::INSERTVALUE;
-
-  return e;
-}
-
-//===----------------------------------------------------------------------===//
-//                     ValueTable External Functions
-//===----------------------------------------------------------------------===//
-
-/// add - Insert a value into the table with a specified value number.
-void ValueTable::add(Value *V, uint32_t num) {
-  valueNumbering[V] = num;
-}
-
-/// computeNumber - Returns the value number for the specified value, assigning
-/// it a new number if it did not have one before.
-uint32_t ValueTable::computeNumber(Value *V) {
-  if (uint32_t v = valueNumbering[V])
-    return v;
-  else if (uint32_t v= constantsNumbering[V])
-    return v;
-
-  if (!isa<Instruction>(V)) {
-    constantsNumbering[V] = nextValueNumber;
-    return nextValueNumber++;
-  }
-  
-  Instruction* I = cast<Instruction>(V);
-  Expression exp;
-  switch (I->getOpcode()) {
-    case Instruction::Add:
-    case Instruction::FAdd:
-    case Instruction::Sub:
-    case Instruction::FSub:
-    case Instruction::Mul:
-    case Instruction::FMul:
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-    case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem:
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::And:
-    case Instruction::Or :
-    case Instruction::Xor:
-      exp = create_expression(cast<BinaryOperator>(I));
-      break;
-    case Instruction::ICmp:
-    case Instruction::FCmp:
-      exp = create_expression(cast<CmpInst>(I));
-      break;
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::PtrToInt:
-    case Instruction::IntToPtr:
-    case Instruction::BitCast:
-      exp = create_expression(cast<CastInst>(I));
-      break;
-    case Instruction::Select:
-      exp = create_expression(cast<SelectInst>(I));
-      break;
-    case Instruction::ExtractElement:
-      exp = create_expression(cast<ExtractElementInst>(I));
-      break;
-    case Instruction::InsertElement:
-      exp = create_expression(cast<InsertElementInst>(I));
-      break;
-    case Instruction::ShuffleVector:
-      exp = create_expression(cast<ShuffleVectorInst>(I));
-      break;
-    case Instruction::ExtractValue:
-      exp = create_expression(cast<ExtractValueInst>(I));
-      break;
-    case Instruction::InsertValue:
-      exp = create_expression(cast<InsertValueInst>(I));
-      break;      
-    case Instruction::GetElementPtr:
-      exp = create_expression(cast<GetElementPtrInst>(I));
-      break;
-    default:
-      valueNumbering[V] = nextValueNumber;
-      return nextValueNumber++;
-  }
-
-  uint32_t& e = expressionNumbering[exp];
-  if (!e) e = nextValueNumber++;
-  valueNumbering[V] = e;
-  
-  return e;
-}
-
-/// lookup - Returns the value number of the specified value. Returns 0 if
-/// the value has not yet been numbered.
-uint32_t ValueTable::lookup(Value *V) {
-  if (!isa<Instruction>(V)) {
-    if (!constantsNumbering.count(V))
-      constantsNumbering[V] = nextValueNumber++;
-    return constantsNumbering[V];
-  }
-  
-  return valueNumbering[V];
-}
-
-/// clear - Remove all entries from the ValueTable
-void ValueTable::clear() {
-  valueNumbering.clear();
-  expressionNumbering.clear();
-  constantsNumbering.clear();
-  nextValueNumber = 1;
-}
-
-void ValueTable::clearExpressions() {
-  expressionNumbering.clear();
-  constantsNumbering.clear();
-  nextValueNumber = 1;
-}
-
-/// erase - Remove a value from the value numbering
-void ValueTable::erase(Value *V) {
-  valueNumbering.erase(V);
-}
-
-/// verifyRemoved - Verify that the value is removed from all internal data
-/// structures.
-void ValueTable::verifyRemoved(const Value *V) const {
-  for (DenseMap<Value*, uint32_t>::const_iterator
-         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
-    assert(I->first != V && "Inst still occurs in value numbering map!");
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//                              SCCVN Pass
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-  struct ValueNumberScope {
-    ValueNumberScope* parent;
-    DenseMap<uint32_t, Value*> table;
-    SparseBitVector<128> availIn;
-    SparseBitVector<128> availOut;
-    
-    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
-  };
-
-  class SCCVN : public FunctionPass {
-    bool runOnFunction(Function &F);
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    SCCVN() : FunctionPass(&ID) { }
-
-  private:
-    ValueTable VT;
-    DenseMap<BasicBlock*, ValueNumberScope*> BBMap;
-    
-    // This transformation requires dominator postdominator info
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<DominatorTree>();
-
-      AU.addPreserved<DominatorTree>();
-      AU.setPreservesCFG();
-    }
-  };
-
-  char SCCVN::ID = 0;
-}
-
-// createSCCVNPass - The public interface to this file...
-FunctionPass *llvm::createSCCVNPass() { return new SCCVN(); }
-
-static RegisterPass<SCCVN> X("sccvn",
-                              "SCC Value Numbering");
-
-static Value *lookupNumber(ValueNumberScope *Locals, uint32_t num) {
-  while (Locals) {
-    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
-    if (I != Locals->table.end())
-      return I->second;
-    Locals = Locals->parent;
-  }
-
-  return 0;
-}
-
-bool SCCVN::runOnFunction(Function& F) {
-  // Implement the RPO version of the SCCVN algorithm.  Conceptually, 
-  // we optimisitically assume that all instructions with the same opcode have
-  // the same VN.  Then we deepen our comparison by one level, to all 
-  // instructions whose operands have the same opcodes get the same VN.  We
-  // iterate this process until the partitioning stops changing, at which
-  // point we have computed a full numbering.
-  ReversePostOrderTraversal<Function*> RPOT(&F);
-  bool done = false;
-  while (!done) {
-    done = true;
-    VT.clearExpressions();
-    for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
-         E = RPOT.end(); I != E; ++I) {
-      BasicBlock* BB = *I;
-      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
-           BI != BE; ++BI) {
-         uint32_t origVN = VT.lookup(BI);
-         uint32_t newVN = VT.computeNumber(BI);
-         if (origVN != newVN)
-           done = false;
-      }
-    }
-  }
-  
-  // Now, do a dominator walk, eliminating simple, dominated redundancies as we
-  // go.  Also, build the ValueNumberScope structure that will be used for
-  // computing full availability.
-  DominatorTree& DT = getAnalysis<DominatorTree>();
-  bool changed = false;
-  for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
-       DE = df_end(DT.getRootNode()); DI != DE; ++DI) {
-    BasicBlock* BB = DI->getBlock();
-    if (DI->getIDom())
-      BBMap[BB] = new ValueNumberScope(BBMap[DI->getIDom()->getBlock()]);
-    else
-      BBMap[BB] = new ValueNumberScope(0);
-    
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-      uint32_t num = VT.lookup(I);
-      Value* repl = lookupNumber(BBMap[BB], num);
-      
-      if (repl) {
-        if (isa<PHINode>(I))
-          ++NumSCCVNPhi;
-        else
-          ++NumSCCVNInstr;
-        I->replaceAllUsesWith(repl);
-        Instruction* OldInst = I;
-        ++I;
-        BBMap[BB]->table[num] = repl;
-        OldInst->eraseFromParent();
-        changed = true;
-      } else {
-        BBMap[BB]->table[num] = I;
-        BBMap[BB]->availOut.set(num);
-  
-        ++I;
-      }
-    }
-  }
-
-  // Perform a forward data-flow to compute availability at all points on
-  // the CFG.
-  do {
-    changed = false;
-    for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
-         E = RPOT.end(); I != E; ++I) {
-      BasicBlock* BB = *I;
-      ValueNumberScope *VNS = BBMap[BB];
-      
-      SparseBitVector<128> preds;
-      bool first = true;
-      for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-           PI != PE; ++PI) {
-        if (first) {
-          preds = BBMap[*PI]->availOut;
-          first = false;
-        } else {
-          preds &= BBMap[*PI]->availOut;
-        }
-      }
-      
-      changed |= (VNS->availIn |= preds);
-      changed |= (VNS->availOut |= preds);
-    }
-  } while (changed);
-  
-  // Use full availability information to perform non-dominated replacements.
-  SSAUpdater SSU; 
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    if (!BBMap.count(FI)) continue;
-    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
-         BI != BE; ) {
-      uint32_t num = VT.lookup(BI);
-      if (!BBMap[FI]->availIn.test(num)) {
-        ++BI;
-        continue;
-      }
-      
-      SSU.Initialize(BI);
-      
-      SmallPtrSet<BasicBlock*, 8> visited;
-      SmallVector<BasicBlock*, 8> stack;
-      visited.insert(FI);
-      for (pred_iterator PI = pred_begin(FI), PE = pred_end(FI);
-           PI != PE; ++PI)
-        if (!visited.count(*PI))
-          stack.push_back(*PI);
-      
-      while (!stack.empty()) {
-        BasicBlock* CurrBB = stack.pop_back_val();
-        visited.insert(CurrBB);
-        
-        ValueNumberScope* S = BBMap[CurrBB];
-        if (S->table.count(num)) {
-          SSU.AddAvailableValue(CurrBB, S->table[num]);
-        } else {
-          for (pred_iterator PI = pred_begin(CurrBB), PE = pred_end(CurrBB);
-               PI != PE; ++PI)
-            if (!visited.count(*PI))
-              stack.push_back(*PI);
-        }
-      }
-      
-      Value* repl = SSU.GetValueInMiddleOfBlock(FI);
-      BI->replaceAllUsesWith(repl);
-      Instruction* CurInst = BI;
-      ++BI;
-      BBMap[FI]->table[num] = repl;
-      if (isa<PHINode>(CurInst))
-        ++NumSCCVNPhi;
-      else
-        ++NumSCCVNInstr;
-        
-      CurInst->eraseFromParent();
-    }
-  }
-
-  VT.clear();
-  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
-       I = BBMap.begin(), E = BBMap.end(); I != E; ++I)
-    delete I->second;
-  BBMap.clear();
-  
-  return changed;
-}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 6211beb..5ca9ce3 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -130,14 +130,7 @@ namespace {
     void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
     
-    bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
-                            bool &SawVec, uint64_t Offset, unsigned AllocaSize);
-    void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
-    Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
-                                     uint64_t Offset, IRBuilder<> &Builder);
-    Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
-                                     uint64_t Offset, IRBuilder<> &Builder);
-    static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
+    static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
   };
 }
 
@@ -150,6 +143,596 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
 }
 
 
+//===----------------------------------------------------------------------===//
+// Convert To Scalar Optimization.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
+/// optimization, which scans the uses of an alloca and determines if it can
+/// rewrite it in terms of a single new alloca that can be mem2reg'd.
+class ConvertToScalarInfo {
+  /// AllocaSize - The size of the alloca being considered.
+  unsigned AllocaSize;
+  const TargetData &TD;
+ 
+  /// IsNotTrivial - This is set to true if there is some access to the object
+  /// which means that mem2reg can't promote it.
+  bool IsNotTrivial;
+  
+  /// VectorTy - This tracks the type that we should promote the vector to if
+  /// it is possible to turn it into a vector.  This starts out null, and if it
+  /// isn't possible to turn into a vector type, it gets set to VoidTy.
+  const Type *VectorTy;
+  
+  /// HadAVector - True if there is at least one vector access to the alloca.
+  /// We don't want to turn random arrays into vectors and use vector element
+  /// insert/extract, but if there are element accesses to something that is
+  /// also declared as a vector, we do want to promote to a vector.
+  bool HadAVector;
+
+public:
+  explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
+    : AllocaSize(Size), TD(td) {
+    IsNotTrivial = false;
+    VectorTy = 0;
+    HadAVector = false;
+  }
+  
+  AllocaInst *TryConvert(AllocaInst *AI);
+  
+private:
+  bool CanConvertToScalar(Value *V, uint64_t Offset);
+  void MergeInType(const Type *In, uint64_t Offset);
+  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+  
+  Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+                                    uint64_t Offset, IRBuilder<> &Builder);
+  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+                                   uint64_t Offset, IRBuilder<> &Builder);
+};
+} // end anonymous namespace.
+
+/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
+/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
+/// alloca if possible or null if not.
+AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
+  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
+  // out.
+  if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+    return 0;
+  
+  // If we were able to find a vector type that can handle this with
+  // insert/extract elements, and if there was at least one use that had
+  // a vector type, promote this to a vector.  We don't want to promote
+  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+  // we just get a lot of insert/extracts.  If at least one vector is
+  // involved, then we probably really do have a union of vector/array.
+  const Type *NewTy;
+  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
+          << *VectorTy << '\n');
+    NewTy = VectorTy;  // Use the vector type.
+  } else {
+    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+    // Create and insert the integer alloca.
+    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+  }
+  AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+  ConvertUsesToScalar(AI, NewAI, 0);
+  return NewAI;
+}
+
+/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
+/// so far at the offset specified by Offset (which is specified in bytes).
+///
+/// There are two cases we handle here:
+///   1) A union of vector types of the same size and potentially its elements.
+///      Here we turn element accesses into insert/extract element operations.
+///      This promotes a <4 x float> with a store of float to the third element
+///      into a <4 x float> that uses insert element.
+///   2) A fully general blob of memory, which we turn into some (potentially
+///      large) integer type with extract and insert operations where the loads
+///      and stores would mutate the memory.  We mark this by setting VectorTy
+///      to VoidTy.
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+  // If we already decided to turn this into a blob of integer memory, there is
+  // nothing to be done.
+  if (VectorTy && VectorTy->isVoidTy())
+    return;
+  
+  // If this could be contributing to a vector, analyze it.
+
+  // If the In type is a vector that is the same size as the alloca, see if it
+  // matches the existing VecTy.
+  if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+    // Remember if we saw a vector type.
+    HadAVector = true;
+    
+    if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+      // If we're storing/loading a vector of the right size, allow it as a
+      // vector.  If this the first vector we see, remember the type so that
+      // we know the element size.  If this is a subsequent access, ignore it
+      // even if it is a differing type but the same size.  Worst case we can
+      // bitcast the resultant vectors.
+      if (VectorTy == 0)
+        VectorTy = VInTy;
+      return;
+    }
+  } else if (In->isFloatTy() || In->isDoubleTy() ||
+             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
+              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // If we're accessing something that could be an element of a vector, see
+    // if the implied vector agrees with what we already have and if Offset is
+    // compatible with it.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+        (VectorTy == 0 || 
+         cast<VectorType>(VectorTy)->getElementType()
+               ->getPrimitiveSizeInBits()/8 == EltSize)) {
+      if (VectorTy == 0)
+        VectorTy = VectorType::get(In, AllocaSize/EltSize);
+      return;
+    }
+  }
+  
+  // Otherwise, we have a case that we can't handle with an optimized vector
+  // form.  We can still turn this into a large integer.
+  VectorTy = Type::getVoidTy(In->getContext());
+}
+
+/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
+/// its accesses to a single vector type, return true and set VecTy to
+/// the new type.  If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // Don't break volatile loads.
+      if (LI->isVolatile())
+        return false;
+      MergeInType(LI->getType(), Offset);
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Storing the pointer, not into the value?
+      if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+      MergeInType(SI->getOperand(0)->getType(), Offset);
+      continue;
+    }
+    
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      if (!CanConvertToScalar(BCI, Offset))
+        return false;
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // If this is a GEP with a variable indices, we can't handle it.
+      if (!GEP->hasAllConstantIndices())
+        return false;
+      
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      // See if all uses can be converted.
+      if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+        return false;
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // handle it.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      // Store of constant value and constant size.
+      if (!isa<ConstantInt>(MSI->getValue()) ||
+          !isa<ConstantInt>(MSI->getLength()))
+        return false;
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
+      if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+        return false;
+      
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+    
+    // Otherwise, we cannot handle this!
+    return false;
+  }
+  
+  return true;
+}
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly.  This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.  By the end of this, there should be no uses of Ptr.
+void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
+                                              uint64_t Offset) {
+  while (!Ptr->use_empty()) {
+    Instruction *User = cast<Instruction>(Ptr->use_back());
+
+    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+      ConvertUsesToScalar(CI, NewAI, Offset);
+      CI->eraseFromParent();
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+      GEP->eraseFromParent();
+      continue;
+    }
+    
+    IRBuilder<> Builder(User->getParent(), User);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // The load is a bit extract from NewAI shifted right by Offset bits.
+      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+      Value *NewLoadVal
+        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+      LI->replaceAllUsesWith(NewLoadVal);
+      LI->eraseFromParent();
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      assert(SI->getOperand(0) != Ptr && "Consistency error!");
+      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+                                             Builder);
+      Builder.CreateStore(New, NewAI);
+      SI->eraseFromParent();
+      
+      // If the load we just inserted is now dead, then the inserted store
+      // overwrote the entire thing.
+      if (Old->use_empty())
+        Old->eraseFromParent();
+      continue;
+    }
+    
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // transform it into a store of the expanded constant value.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      assert(MSI->getRawDest() == Ptr && "Consistency error!");
+      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+      if (NumBytes != 0) {
+        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+        
+        // Compute the value replicated the right number of times.
+        APInt APVal(NumBytes*8, Val);
+
+        // Splat the value if non-zero.
+        if (Val)
+          for (unsigned i = 1; i != NumBytes; ++i)
+            APVal |= APVal << 8;
+        
+        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+        Value *New = ConvertScalar_InsertValue(
+                                    ConstantInt::get(User->getContext(), APVal),
+                                               Old, Offset, Builder);
+        Builder.CreateStore(New, NewAI);
+        
+        // If the load we just inserted is now dead, then the memset overwrote
+        // the entire thing.
+        if (Old->use_empty())
+          Old->eraseFromParent();        
+      }
+      MSI->eraseFromParent();
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      assert(Offset == 0 && "must be store to start of alloca");
+      
+      // If the source and destination are both to the same alloca, then this is
+      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
+      // as appropriate.
+      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
+      
+      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
+        // Dest must be OrigAI, change this to be a load from the original
+        // pointer (bitcasted), then a store to our new alloca.
+        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+        Value *SrcPtr = MTI->getSource();
+        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
+        
+        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+        SrcVal->setAlignment(MTI->getAlignment());
+        Builder.CreateStore(SrcVal, NewAI);
+      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
+        // Src must be OrigAI, change this to be a load from NewAI then a store
+        // through the original dest pointer (bitcasted).
+        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+        NewStore->setAlignment(MTI->getAlignment());
+      } else {
+        // Noop transfer. Src == Dst
+      }
+
+      MTI->eraseFromParent();
+      continue;
+    }
+    
+    llvm_unreachable("Unsupported operation!");
+  }
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset.  This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+                           uint64_t Offset, IRBuilder<> &Builder) {
+  // If the load is of the whole new alloca, no conversion is needed.
+  if (FromVal->getType() == ToType && Offset == 0)
+    return FromVal;
+
+  // If the result alloca is a vector type, this is either an element
+  // access or a bitcast to another vector type of the same size.
+  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
+    if (ToType->isVectorTy())
+      return Builder.CreateBitCast(FromVal, ToType, "tmp");
+
+    // Otherwise it must be an element access.
+    unsigned Elt = 0;
+    if (Offset) {
+      unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+      Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+    }
+    // Return the element extracted out of it.
+    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+    if (V->getType() != ToType)
+      V = Builder.CreateBitCast(V, ToType, "tmp");
+    return V;
+  }
+  
+  // If ToType is a first class aggregate, extract out each of the pieces and
+  // use insertvalue's to form the FCA.
+  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    Value *Res = UndefValue::get(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+                                        Offset+Layout.getElementOffsetInBits(i),
+                                              Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+  
+  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    Value *Res = UndefValue::get(AT);
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+                                              Offset+i*EltSize, Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+
+  // Otherwise, this must be a union that was converted to an integer value.
+  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+  // If this is a big-endian system and the load is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+            TD.getTypeStoreSizeInBits(ToType) - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shl) which are not defined.
+  // We do this to support (f.e.) loads off the end of a structure where
+  // only some bits are used.
+  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateLShr(FromVal,
+                                 ConstantInt::get(FromVal->getType(),
+                                                           ShAmt), "tmp");
+  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateShl(FromVal, 
+                                ConstantInt::get(FromVal->getType(),
+                                                          -ShAmt), "tmp");
+
+  // Finally, unconditionally truncate the integer to the right width.
+  unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+  if (LIBitWidth < NTy->getBitWidth())
+    FromVal =
+      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
+  else if (LIBitWidth > NTy->getBitWidth())
+    FromVal =
+       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
+
+  // If the result is an integer, this is a trunc or bitcast.
+  if (ToType->isIntegerTy()) {
+    // Should be done.
+  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
+    // Just do a bitcast, we know the sizes match up.
+    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+  } else {
+    // Otherwise must be a pointer.
+    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+  }
+  assert(FromVal->getType() == ToType && "Didn't convert right?");
+  return FromVal;
+}
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_InsertValue(Value *SV, Value *Old,
+                          uint64_t Offset, IRBuilder<> &Builder) {
+  // Convert the stored type to the actual type, shift it left to insert
+  // then 'or' into place.
+  const Type *AllocaType = Old->getType();
+  LLVMContext &Context = Old->getContext();
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+    uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
+    uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+    
+    // Changing the whole vector with memset or with an access of a different
+    // vector type?
+    if (ValSize == VecSize)
+      return Builder.CreateBitCast(SV, AllocaType, "tmp");
+
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+
+    // Must be an element insertion.
+    unsigned Elt = Offset/EltSize;
+    
+    if (SV->getType() != VTy->getElementType())
+      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+    
+    SV = Builder.CreateInsertElement(Old, SV, 
+                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
+                                     "tmp");
+    return SV;
+  }
+  
+  // If SV is a first-class aggregate value, insert each value recursively.
+  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old, 
+                                      Offset+Layout.getElementOffsetInBits(i),
+                                      Builder);
+    }
+    return Old;
+  }
+  
+  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+    }
+    return Old;
+  }
+
+  // If SV is a float, convert it to the appropriate integer type.
+  // If it is a pointer, do the same.
+  unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
+  unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
+  unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
+  unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
+    SV = Builder.CreateBitCast(SV,
+                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+  else if (SV->getType()->isPointerTy())
+    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+
+  // Zero extend or truncate the value if needed.
+  if (SV->getType() != AllocaType) {
+    if (SV->getType()->getPrimitiveSizeInBits() <
+             AllocaType->getPrimitiveSizeInBits())
+      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+    else {
+      // Truncation may be needed if storing more than the alloca can hold
+      // (undefined behavior).
+      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+      SrcWidth = DestWidth;
+      SrcStoreWidth = DestStoreWidth;
+    }
+  }
+
+  // If this is a big-endian system and the store is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shr) which are not defined.
+  // We do this to support (f.e.) stores off the end of a structure where
+  // only some bits in the structure are set.
+  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
+                           ShAmt), "tmp");
+    Mask <<= ShAmt;
+  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
+                            -ShAmt), "tmp");
+    Mask = Mask.lshr(-ShAmt);
+  }
+
+  // Mask out the bits we are about to insert from the old value, and or
+  // in the new bits.
+  if (SrcWidth != DestWidth) {
+    assert(DestWidth > SrcWidth);
+    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
+    SV = Builder.CreateOr(Old, SV, "ins");
+  }
+  return SV;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SRoA Driver
+//===----------------------------------------------------------------------===//
+
+
 bool SROA::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<TargetData>();
 
@@ -202,6 +785,7 @@ bool SROA::performPromotion(Function &F) {
   return Changed;
 }
 
+
 /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
 /// SROA.  It must be a struct or array type with a small number of elements.
 static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
@@ -216,6 +800,7 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
   return false;
 }
 
+
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
 // which runs on all of the malloc/alloca instructions in the function, removing
 // them if they are only used by getelementptr instructions.
@@ -223,7 +808,7 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
 
-  // Scan the entry basic block, adding any alloca's and mallocs to the worklist
+  // Scan the entry basic block, adding allocas to the worklist.
   BasicBlock &BB = F.getEntryBlock();
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
     if (AllocaInst *A = dyn_cast<AllocaInst>(I))
@@ -239,6 +824,7 @@ bool SROA::performScalarRepl(Function &F) {
     // with unused elements.
     if (AI->use_empty()) {
       AI->eraseFromParent();
+      Changed = true;
       continue;
     }
 
@@ -251,10 +837,10 @@ bool SROA::performScalarRepl(Function &F) {
     // the constant global instead.  This is commonly produced by the CFE by
     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
     // is only subsequently read.
-    if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+    if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
       DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
       DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
-      Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
+      Constant *TheSrc = cast<Constant>(TheCopy->getSource());
       AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
       TheCopy->eraseFromParent();  // Don't mutate the global.
       AI->eraseFromParent();
@@ -271,7 +857,10 @@ bool SROA::performScalarRepl(Function &F) {
 
     // Do not promote [0 x %struct].
     if (AllocaSize == 0) continue;
-
+    
+    // Do not promote any struct whose size is too big.
+    if (AllocaSize > SRThreshold) continue;
+    
     // If the alloca looks like a good candidate for scalar replacement, and if
     // all its users can be transformed, then split up the aggregate into its
     // separate elements.
@@ -281,48 +870,20 @@ bool SROA::performScalarRepl(Function &F) {
       continue;
     }
 
-    // Do not promote any struct whose size is too big.
-    if (AllocaSize > SRThreshold) continue;
-
     // If we can turn this aggregate value (potentially with casts) into a
     // simple scalar value that can be mem2reg'd into a register value.
     // IsNotTrivial tracks whether this is something that mem2reg could have
     // promoted itself.  If so, we don't want to transform it needlessly.  Note
     // that we can't just check based on the type: the alloca may be of an i32
     // but that has pointer arithmetic to set byte 3 of it or something.
-    bool IsNotTrivial = false;
-    const Type *VectorTy = 0;
-    bool HadAVector = false;
-    if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, 
-                           0, unsigned(AllocaSize)) && IsNotTrivial) {
-      AllocaInst *NewAI;
-      // If we were able to find a vector type that can handle this with
-      // insert/extract elements, and if there was at least one use that had
-      // a vector type, promote this to a vector.  We don't want to promote
-      // random stuff that doesn't use vectors (e.g. <9 x double>) because then
-      // we just get a lot of insert/extracts.  If at least one vector is
-      // involved, then we probably really do have a union of vector/array.
-      if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
-        DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
-                     << *VectorTy << '\n');
-        
-        // Create and insert the vector alloca.
-        NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());
-        ConvertUsesToScalar(AI, NewAI, 0);
-      } else {
-        DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
-        
-        // Create and insert the integer alloca.
-        const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
-        NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
-        ConvertUsesToScalar(AI, NewAI, 0);
-      }
+    if (AllocaInst *NewAI =
+          ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
       NewAI->takeName(AI);
       AI->eraseFromParent();
       ++NumConverted;
       Changed = true;
       continue;
-    }
+    }      
     
     // Otherwise, couldn't process this alloca.
   }
@@ -698,7 +1259,6 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
   // that doesn't have anything to do with the alloca that we are promoting. For
   // memset, this Value* stays null.
   Value *OtherPtr = 0;
-  LLVMContext &Context = MI->getContext();
   unsigned MemAlignment = MI->getAlignment();
   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
     if (Inst == MTI->getRawDest())
@@ -756,7 +1316,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
   }
   
   // Process each element of the aggregate.
-  Value *TheFn = MI->getOperand(0);
+  Value *TheFn = MI->getCalledValue();
   const Type *BytePtrTy = MI->getRawDest()->getType();
   bool SROADest = MI->getRawDest() == Inst;
   
@@ -775,12 +1335,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
                                                    MI);
       uint64_t EltOffset;
       const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
-      if (const StructType *ST =
-            dyn_cast<StructType>(OtherPtrTy->getElementType())) {
+      const Type *OtherTy = OtherPtrTy->getElementType();
+      if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
         EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
       } else {
-        const Type *EltTy =
-          cast<SequentialType>(OtherPtr->getType())->getElementType();
+        const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
         EltOffset = TD->getTypeAllocSize(EltTy)*i;
       }
       
@@ -832,7 +1391,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
           }
           
           // Convert the integer value to the appropriate type.
-          StoreVal = ConstantInt::get(Context, TotalVal);
+          StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
           if (ValTy->isPointerTy())
             StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
           else if (ValTy->isFloatingPointTy())
@@ -1174,509 +1733,6 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   return true;
 }
 
-/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
-/// the offset specified by Offset (which is specified in bytes).
-///
-/// There are two cases we handle here:
-///   1) A union of vector types of the same size and potentially its elements.
-///      Here we turn element accesses into insert/extract element operations.
-///      This promotes a <4 x float> with a store of float to the third element
-///      into a <4 x float> that uses insert element.
-///   2) A fully general blob of memory, which we turn into some (potentially
-///      large) integer type with extract and insert operations where the loads
-///      and stores would mutate the memory.
-static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
-                        unsigned AllocaSize, const TargetData &TD,
-                        LLVMContext &Context) {
-  // If this could be contributing to a vector, analyze it.
-  if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type.
-
-    // If the In type is a vector that is the same size as the alloca, see if it
-    // matches the existing VecTy.
-    if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
-      if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
-        // If we're storing/loading a vector of the right size, allow it as a
-        // vector.  If this the first vector we see, remember the type so that
-        // we know the element size.
-        if (VecTy == 0)
-          VecTy = VInTy;
-        return;
-      }
-    } else if (In->isFloatTy() || In->isDoubleTy() ||
-               (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
-                isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
-      // If we're accessing something that could be an element of a vector, see
-      // if the implied vector agrees with what we already have and if Offset is
-      // compatible with it.
-      unsigned EltSize = In->getPrimitiveSizeInBits()/8;
-      if (Offset % EltSize == 0 &&
-          AllocaSize % EltSize == 0 &&
-          (VecTy == 0 || 
-           cast<VectorType>(VecTy)->getElementType()
-                 ->getPrimitiveSizeInBits()/8 == EltSize)) {
-        if (VecTy == 0)
-          VecTy = VectorType::get(In, AllocaSize/EltSize);
-        return;
-      }
-    }
-  }
-  
-  // Otherwise, we have a case that we can't handle with an optimized vector
-  // form.  We can still turn this into a large integer.
-  VecTy = Type::getVoidTy(Context);
-}
-
-/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
-/// its accesses to a single vector type, return true and set VecTy to
-/// the new type.  If we could convert the alloca into a single promotable
-/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
-/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
-/// is the current offset from the base of the alloca being analyzed.
-///
-/// If we see at least one access to the value that is as a vector type, set the
-/// SawVec flag.
-bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
-                              bool &SawVec, uint64_t Offset,
-                              unsigned AllocaSize) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
-    Instruction *User = cast<Instruction>(*UI);
-    
-    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      // Don't break volatile loads.
-      if (LI->isVolatile())
-        return false;
-      MergeInType(LI->getType(), Offset, VecTy,
-                  AllocaSize, *TD, V->getContext());
-      SawVec |= LI->getType()->isVectorTy();
-      continue;
-    }
-    
-    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-      // Storing the pointer, not into the value?
-      if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
-      MergeInType(SI->getOperand(0)->getType(), Offset,
-                  VecTy, AllocaSize, *TD, V->getContext());
-      SawVec |= SI->getOperand(0)->getType()->isVectorTy();
-      continue;
-    }
-    
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
-      if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset,
-                              AllocaSize))
-        return false;
-      IsNotTrivial = true;
-      continue;
-    }
-
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      // If this is a GEP with a variable indices, we can't handle it.
-      if (!GEP->hasAllConstantIndices())
-        return false;
-      
-      // Compute the offset that this GEP adds to the pointer.
-      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
-      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
-                                                &Indices[0], Indices.size());
-      // See if all uses can be converted.
-      if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
-                              AllocaSize))
-        return false;
-      IsNotTrivial = true;
-      continue;
-    }
-
-    // If this is a constant sized memset of a constant value (e.g. 0) we can
-    // handle it.
-    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      // Store of constant value and constant size.
-      if (isa<ConstantInt>(MSI->getValue()) &&
-          isa<ConstantInt>(MSI->getLength())) {
-        IsNotTrivial = true;
-        continue;
-      }
-    }
-
-    // If this is a memcpy or memmove into or out of the whole allocation, we
-    // can handle it like a load or store of the scalar type.
-    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
-      if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
-        if (Len->getZExtValue() == AllocaSize && Offset == 0) {
-          IsNotTrivial = true;
-          continue;
-        }
-    }
-    
-    // Otherwise, we cannot handle this!
-    return false;
-  }
-  
-  return true;
-}
-
-/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
-/// directly.  This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.  By the end of this, there should be no uses of Ptr.
-void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
-  while (!Ptr->use_empty()) {
-    Instruction *User = cast<Instruction>(Ptr->use_back());
-
-    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
-      ConvertUsesToScalar(CI, NewAI, Offset);
-      CI->eraseFromParent();
-      continue;
-    }
-
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
-      // Compute the offset that this GEP adds to the pointer.
-      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
-      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
-                                                &Indices[0], Indices.size());
-      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
-      GEP->eraseFromParent();
-      continue;
-    }
-    
-    IRBuilder<> Builder(User->getParent(), User);
-    
-    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      // The load is a bit extract from NewAI shifted right by Offset bits.
-      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
-      Value *NewLoadVal
-        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
-      LI->replaceAllUsesWith(NewLoadVal);
-      LI->eraseFromParent();
-      continue;
-    }
-    
-    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-      assert(SI->getOperand(0) != Ptr && "Consistency error!");
-      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
-      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
-                                             Builder);
-      Builder.CreateStore(New, NewAI);
-      SI->eraseFromParent();
-      
-      // If the load we just inserted is now dead, then the inserted store
-      // overwrote the entire thing.
-      if (Old->use_empty())
-        Old->eraseFromParent();
-      continue;
-    }
-    
-    // If this is a constant sized memset of a constant value (e.g. 0) we can
-    // transform it into a store of the expanded constant value.
-    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      assert(MSI->getRawDest() == Ptr && "Consistency error!");
-      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-      if (NumBytes != 0) {
-        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-        
-        // Compute the value replicated the right number of times.
-        APInt APVal(NumBytes*8, Val);
-
-        // Splat the value if non-zero.
-        if (Val)
-          for (unsigned i = 1; i != NumBytes; ++i)
-            APVal |= APVal << 8;
-        
-        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
-        Value *New = ConvertScalar_InsertValue(
-                                    ConstantInt::get(User->getContext(), APVal),
-                                               Old, Offset, Builder);
-        Builder.CreateStore(New, NewAI);
-        
-        // If the load we just inserted is now dead, then the memset overwrote
-        // the entire thing.
-        if (Old->use_empty())
-          Old->eraseFromParent();        
-      }
-      MSI->eraseFromParent();
-      continue;
-    }
-
-    // If this is a memcpy or memmove into or out of the whole allocation, we
-    // can handle it like a load or store of the scalar type.
-    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
-      assert(Offset == 0 && "must be store to start of alloca");
-      
-      // If the source and destination are both to the same alloca, then this is
-      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
-      // as appropriate.
-      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
-      
-      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
-        // Dest must be OrigAI, change this to be a load from the original
-        // pointer (bitcasted), then a store to our new alloca.
-        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
-        Value *SrcPtr = MTI->getSource();
-        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
-        
-        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
-        SrcVal->setAlignment(MTI->getAlignment());
-        Builder.CreateStore(SrcVal, NewAI);
-      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
-        // Src must be OrigAI, change this to be a load from NewAI then a store
-        // through the original dest pointer (bitcasted).
-        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
-        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
-
-        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
-        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
-        NewStore->setAlignment(MTI->getAlignment());
-      } else {
-        // Noop transfer. Src == Dst
-      }
-
-      MTI->eraseFromParent();
-      continue;
-    }
-    
-    llvm_unreachable("Unsupported operation!");
-  }
-}
-
-/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
-/// or vector value FromVal, extracting the bits from the offset specified by
-/// Offset.  This returns the value, which is of type ToType.
-///
-/// This happens when we are converting an "integer union" to a single
-/// integer scalar, or when we are converting a "vector union" to a vector with
-/// insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
-                                        uint64_t Offset, IRBuilder<> &Builder) {
-  // If the load is of the whole new alloca, no conversion is needed.
-  if (FromVal->getType() == ToType && Offset == 0)
-    return FromVal;
-
-  // If the result alloca is a vector type, this is either an element
-  // access or a bitcast to another vector type of the same size.
-  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
-    if (ToType->isVectorTy())
-      return Builder.CreateBitCast(FromVal, ToType, "tmp");
-
-    // Otherwise it must be an element access.
-    unsigned Elt = 0;
-    if (Offset) {
-      unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
-      Elt = Offset/EltSize;
-      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
-    }
-    // Return the element extracted out of it.
-    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
-                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
-    if (V->getType() != ToType)
-      V = Builder.CreateBitCast(V, ToType, "tmp");
-    return V;
-  }
-  
-  // If ToType is a first class aggregate, extract out each of the pieces and
-  // use insertvalue's to form the FCA.
-  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
-    const StructLayout &Layout = *TD->getStructLayout(ST);
-    Value *Res = UndefValue::get(ST);
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
-      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
-                                        Offset+Layout.getElementOffsetInBits(i),
-                                              Builder);
-      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
-    }
-    return Res;
-  }
-  
-  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
-    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
-    Value *Res = UndefValue::get(AT);
-    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
-      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
-                                              Offset+i*EltSize, Builder);
-      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
-    }
-    return Res;
-  }
-
-  // Otherwise, this must be a union that was converted to an integer value.
-  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
-
-  // If this is a big-endian system and the load is narrower than the
-  // full alloca type, we need to do a shift to get the right bits.
-  int ShAmt = 0;
-  if (TD->isBigEndian()) {
-    // On big-endian machines, the lowest bit is stored at the bit offset
-    // from the pointer given by getTypeStoreSizeInBits.  This matters for
-    // integers with a bitwidth that is not a multiple of 8.
-    ShAmt = TD->getTypeStoreSizeInBits(NTy) -
-            TD->getTypeStoreSizeInBits(ToType) - Offset;
-  } else {
-    ShAmt = Offset;
-  }
-
-  // Note: we support negative bitwidths (with shl) which are not defined.
-  // We do this to support (f.e.) loads off the end of a structure where
-  // only some bits are used.
-  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateLShr(FromVal,
-                                 ConstantInt::get(FromVal->getType(),
-                                                           ShAmt), "tmp");
-  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateShl(FromVal, 
-                                ConstantInt::get(FromVal->getType(),
-                                                          -ShAmt), "tmp");
-
-  // Finally, unconditionally truncate the integer to the right width.
-  unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
-  if (LIBitWidth < NTy->getBitWidth())
-    FromVal =
-      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 
-                                                    LIBitWidth), "tmp");
-  else if (LIBitWidth > NTy->getBitWidth())
-    FromVal =
-       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 
-                                                    LIBitWidth), "tmp");
-
-  // If the result is an integer, this is a trunc or bitcast.
-  if (ToType->isIntegerTy()) {
-    // Should be done.
-  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
-    // Just do a bitcast, we know the sizes match up.
-    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
-  } else {
-    // Otherwise must be a pointer.
-    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
-  }
-  assert(FromVal->getType() == ToType && "Didn't convert right?");
-  return FromVal;
-}
-
-/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
-/// or vector value "Old" at the offset specified by Offset.
-///
-/// This happens when we are converting an "integer union" to a
-/// single integer scalar, or when we are converting a "vector union" to a
-/// vector with insert/extractelement instructions.
-///
-/// Offset is an offset from the original alloca, in bits that need to be
-/// shifted to the right.
-Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
-                                       uint64_t Offset, IRBuilder<> &Builder) {
-
-  // Convert the stored type to the actual type, shift it left to insert
-  // then 'or' into place.
-  const Type *AllocaType = Old->getType();
-  LLVMContext &Context = Old->getContext();
-
-  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
-    uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
-    uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType());
-    
-    // Changing the whole vector with memset or with an access of a different
-    // vector type?
-    if (ValSize == VecSize)
-      return Builder.CreateBitCast(SV, AllocaType, "tmp");
-
-    uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
-
-    // Must be an element insertion.
-    unsigned Elt = Offset/EltSize;
-    
-    if (SV->getType() != VTy->getElementType())
-      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
-    
-    SV = Builder.CreateInsertElement(Old, SV, 
-                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
-                                     "tmp");
-    return SV;
-  }
-  
-  // If SV is a first-class aggregate value, insert each value recursively.
-  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
-    const StructLayout &Layout = *TD->getStructLayout(ST);
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
-      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
-      Old = ConvertScalar_InsertValue(Elt, Old, 
-                                      Offset+Layout.getElementOffsetInBits(i),
-                                      Builder);
-    }
-    return Old;
-  }
-  
-  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
-    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
-    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
-      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
-      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
-    }
-    return Old;
-  }
-
-  // If SV is a float, convert it to the appropriate integer type.
-  // If it is a pointer, do the same.
-  unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
-  unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
-  unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
-  unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
-  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
-    SV = Builder.CreateBitCast(SV,
-                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
-  else if (SV->getType()->isPointerTy())
-    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
-
-  // Zero extend or truncate the value if needed.
-  if (SV->getType() != AllocaType) {
-    if (SV->getType()->getPrimitiveSizeInBits() <
-             AllocaType->getPrimitiveSizeInBits())
-      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
-    else {
-      // Truncation may be needed if storing more than the alloca can hold
-      // (undefined behavior).
-      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
-      SrcWidth = DestWidth;
-      SrcStoreWidth = DestStoreWidth;
-    }
-  }
-
-  // If this is a big-endian system and the store is narrower than the
-  // full alloca type, we need to do a shift to get the right bits.
-  int ShAmt = 0;
-  if (TD->isBigEndian()) {
-    // On big-endian machines, the lowest bit is stored at the bit offset
-    // from the pointer given by getTypeStoreSizeInBits.  This matters for
-    // integers with a bitwidth that is not a multiple of 8.
-    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
-  } else {
-    ShAmt = Offset;
-  }
-
-  // Note: we support negative bitwidths (with shr) which are not defined.
-  // We do this to support (f.e.) stores off the end of a structure where
-  // only some bits in the structure are set.
-  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
-  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
-    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
-                           ShAmt), "tmp");
-    Mask <<= ShAmt;
-  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
-    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
-                            -ShAmt), "tmp");
-    Mask = Mask.lshr(-ShAmt);
-  }
-
-  // Mask out the bits we are about to insert from the old value, and or
-  // in the new bits.
-  if (SrcWidth != DestWidth) {
-    assert(DestWidth > SrcWidth);
-    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
-    SV = Builder.CreateOr(Old, SV, "ins");
-  }
-  return SV;
-}
-
 
 
 /// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
@@ -1699,21 +1755,23 @@ static bool PointsToConstantGlobal(Value *V) {
 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
 /// the alloca, and if the source pointer is a pointer to a constant  global, we
 /// can optimize this.
-static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
+static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
                                            bool isOffset) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+    User *U = cast<Instruction>(*UI);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U))
       // Ignore non-volatile loads, they are always ok.
       if (!LI->isVolatile())
         continue;
     
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       // If uses of the bitcast are ok, we are ok.
       if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
         return false;
       continue;
     }
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
       // doesn't, it does.
       if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
@@ -1724,7 +1782,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
     
     // If this is isn't our memcpy/memmove, reject it as something we can't
     // handle.
-    if (!isa<MemTransferInst>(*UI))
+    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+    if (MI == 0)
       return false;
 
     // If we already have seen a copy, reject the second one.
@@ -1737,10 +1796,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
     // If the memintrinsic isn't using the alloca as the dest, reject it.
     if (UI.getOperandNo() != 1) return false;
     
-    MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
-    
     // If the source of the memcpy/move is not a constant global, reject it.
-    if (!PointsToConstantGlobal(MI->getOperand(2)))
+    if (!PointsToConstantGlobal(MI->getSource()))
       return false;
     
     // Otherwise, the transform is safe.  Remember the copy instruction.
@@ -1752,8 +1809,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
 /// modified by a copy from a constant global.  If we can prove this, we can
 /// replace any uses of the alloca with uses of the global directly.
-Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
-  Instruction *TheCopy = 0;
+MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
+  MemTransferInst *TheCopy = 0;
   if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
     return TheCopy;
   return 0;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 4464961..c3408e7 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -93,7 +93,8 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
     // Inline the call, taking care of what code ends up where.
     NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
 
-    bool B = InlineFunction(Call, 0, TD);
+    InlineFunctionInfo IFI(0, TD);
+    bool B = InlineFunction(Call, IFI);
     assert(B && "half_powr didn't inline?"); B=B;
 
     BasicBlock *NewBody = NewBlock->getSinglePredecessor();
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 162d902..5ad5de2 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -59,6 +59,8 @@
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -328,15 +330,6 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   if (&BB->front() == Ret) // Make sure there is something before the ret...
     return false;
   
-  // If the return is in the entry block, then making this transformation would
-  // turn infinite recursion into an infinite loop.  This transformation is ok
-  // in theory, but breaks some code like:
-  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
-  // disable this xform in this case, because the code generator will lower the
-  // call to fabs into inline code.
-  if (BB == &F->getEntryBlock())
-    return false;
-
   // Scan backwards from the return, checking to see if there is a tail call in
   // this block.  If so, set CI to it.
   CallInst *CI;
@@ -356,6 +349,25 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
     return false;
 
+  // As a special case, detect code like this:
+  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+  // and disable this xform in this case, because the code generator will
+  // lower the call to fabs into inline code.
+  if (BB == &F->getEntryBlock() && 
+      &BB->front() == CI && &*++BB->begin() == Ret &&
+      callIsSmall(F)) {
+    // A single-block function with just a call and a return. Check that
+    // the arguments match.
+    CallSite::arg_iterator I = CallSite(CI).arg_begin(),
+                           E = CallSite(CI).arg_end();
+    Function::arg_iterator FI = F->arg_begin(),
+                           FE = F->arg_end();
+    for (; I != E && FI != FE; ++I, ++FI)
+      if (*I != &*FI) break;
+    if (I == E && FI == FE)
+      return false;
+  }
+
   // If we are introducing accumulator recursion to eliminate associative
   // operations after the call instruction, this variable contains the initial
   // value for the accumulator.  If this value is set, we actually perform
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index c70bab5..ea9d1c1 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -434,19 +434,21 @@ static bool FindAllMemoryUses(Instruction *I,
   // Loop over all the uses, recursively processing them.
   for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
        UI != E; ++UI) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+    User *U = *UI;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
       continue;
     }
     
-    if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
       unsigned opNo = UI.getOperandNo();
       if (opNo == 0) return true; // Storing addr, not into addr.
       MemoryUses.push_back(std::make_pair(SI, opNo));
       continue;
     }
     
-    if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
       if (IA == 0) return true;
       
@@ -456,7 +458,7 @@ static bool FindAllMemoryUses(Instruction *I,
       continue;
     }
     
-    if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts,
+    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
                           TLI))
       return true;
   }
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index c580b8f..f0e31ef 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -129,7 +129,8 @@ void BasicInlinerImpl::inlineFunctions() {
         }
         
         // Inline
-        if (InlineFunction(CS, NULL, TD)) {
+        InlineFunctionInfo IFI(0, TD);
+        if (InlineFunction(CS, IFI)) {
           if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
                                       Callee->hasAvailableExternallyLinkage()))
             DeadFunctions.insert(Callee);
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index fff8179..767fa3a 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -90,15 +90,15 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
 /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
 /// specified pointer arguments.
 Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
-                         IRBuilder<> &B, const TargetData *TD) {
+                         IRBuilder<> &B, const TargetData *TD, StringRef Name) {
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   const Type *I8Ptr = B.getInt8PtrTy();
-  Value *StrNCpy = M->getOrInsertFunction("strncpy", AttrListPtr::get(AWI, 2),
-                                         I8Ptr, I8Ptr, I8Ptr,
-                                         Len->getType(), NULL);
+  Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                          I8Ptr, I8Ptr, I8Ptr,
+                                          Len->getType(), NULL);
   CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
                                Len, "strncpy");
   if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
@@ -373,15 +373,29 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
 SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
 
 bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+  // We really need TargetData for later.
+  if (!TD) return false;
+  
   this->CI = CI;
-  StringRef Name = CI->getCalledFunction()->getName();
+  Function *Callee = CI->getCalledFunction();
+  StringRef Name = Callee->getName();
+  const FunctionType *FT = Callee->getFunctionType();
   BasicBlock *BB = CI->getParent();
-  IRBuilder<> B(CI->getParent()->getContext());
+  LLVMContext &Context = CI->getParent()->getContext();
+  IRBuilder<> B(Context);
 
   // Set the builder to the instruction after the call.
   B.SetInsertPoint(BB, CI);
 
   if (Name == "__memcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
                  1, false, B, TD);
@@ -397,6 +411,14 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
   }
 
   if (Name == "__memmove_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
                   1, false, B, TD);
@@ -407,6 +429,14 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
   }
 
   if (Name == "__memset_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
                                    false);
@@ -418,6 +448,15 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
   }
 
   if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(Context))
+      return 0;
+    
+    
     // If a) we don't have any length information, or b) we know this will
     // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
     // st[rp]cpy_chk call which may fail at runtime if the size is too long.
@@ -432,10 +471,18 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
     return false;
   }
 
-  if (Name == "__strncpy_chk") {
+  if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+    
     if (isFoldable(4, 3, false)) {
       Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2),
-                               CI->getOperand(3), B, TD);
+                               CI->getOperand(3), B, TD, Name.substr(2, 7));
       replaceCall(Ret);
       return true;
     }
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 62fc2ec..8ad66dd 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -23,7 +23,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "ValueMapper.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index a163f89..b87c082 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -17,7 +17,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Constant.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "ValueMapper.h"
 using namespace llvm;
 
 /// CloneModule - Return an exact copy of the specified module.  This is not as
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index b208494..b51f751 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -751,7 +751,7 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   //  verifyFunction(*oldFunction);
 
   DEBUG(if (verifyFunction(*newFunction)) 
-        llvm_report_error("verifyFunction failed!"));
+        report_fatal_error("verifyFunction failed!"));
   return newFunction;
 }
 
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 75c9ccd..91390bc 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
@@ -29,13 +28,11 @@
 #include "llvm/Support/CallSite.h"
 using namespace llvm;
 
-bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD,
-                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
-  return InlineFunction(CallSite(CI), CG, TD, StaticAllocas);
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(CI), IFI);
 }
-bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD,
-                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
-  return InlineFunction(CallSite(II), CG, TD, StaticAllocas);
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(II), IFI);
 }
 
 
@@ -75,7 +72,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     II->setAttributes(CI->getAttributes());
     
     // Make sure that anything using the call now uses the invoke!  This also
-    // updates the CallGraph if present.
+    // updates the CallGraph if present, because it uses a WeakVH.
     CI->replaceAllUsesWith(II);
     
     // Delete the unconditional branch inserted by splitBasicBlock
@@ -173,7 +170,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 static void UpdateCallGraphAfterInlining(CallSite CS,
                                          Function::iterator FirstNewBlock,
                                        DenseMap<const Value*, Value*> &ValueMap,
-                                         CallGraph &CG) {
+                                         InlineFunctionInfo &IFI) {
+  CallGraph &CG = *IFI.CG;
   const Function *Caller = CS.getInstruction()->getParent()->getParent();
   const Function *Callee = CS.getCalledFunction();
   CallGraphNode *CalleeNode = CG[Callee];
@@ -201,8 +199,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
     
     // If the call was inlined, but then constant folded, there is no edge to
     // add.  Check for this case.
-    if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
-      CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
+    Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+    if (NewCall == 0) continue;
+
+    // Remember that this call site got inlined for the client of
+    // InlineFunction.
+    IFI.InlinedCalls.push_back(NewCall);
+
+    // It's possible that inlining the callsite will cause it to go from an
+    // indirect to a direct call by resolving a function pointer.  If this
+    // happens, set the callee of the new call site to a more precise
+    // destination.  This can also happen if the call graph node of the caller
+    // was just unnecessarily imprecise.
+    if (I->second->getFunction() == 0)
+      if (Function *F = CallSite(NewCall).getCalledFunction()) {
+        // Indirect call site resolved to direct call.
+        CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]);
+        
+        continue;
+      }
+    
+    CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
   }
   
   // Update the call graph by deleting the edge from Callee to Caller.  We must
@@ -219,13 +236,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 // exists in the instruction stream.  Similiarly this will inline a recursive
 // function by one level.
 //
-bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
-                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   Instruction *TheCall = CS.getInstruction();
   LLVMContext &Context = TheCall->getContext();
   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
          "Instruction not in function!");
 
+  // If IFI has any state in it, zap it before we fill it in.
+  IFI.reset();
+  
   const Function *CalledFunc = CS.getCalledFunction();
   if (CalledFunc == 0 ||          // Can't inline external function or indirect
       CalledFunc->isDeclaration() || // call, or call to a vararg function!
@@ -292,7 +311,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
 
         // Create the alloca.  If we have TargetData, use nice alignment.
         unsigned Align = 1;
-        if (TD) Align = TD->getPrefTypeAlignment(AggTy);
+        if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy);
         Value *NewAlloca = new AllocaInst(AggTy, 0, Align, 
                                           I->getName(), 
                                           &*Caller->begin()->begin());
@@ -305,11 +324,11 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
         Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
 
         Value *Size;
-        if (TD == 0)
+        if (IFI.TD == 0)
           Size = ConstantExpr::getSizeOf(AggTy);
         else
           Size = ConstantInt::get(Type::getInt64Ty(Context),
-                                  TD->getTypeStoreSize(AggTy));
+                                  IFI.TD->getTypeStoreSize(AggTy));
 
         // Always generate a memcpy of alignment 1 here because we don't know
         // the alignment of the src pointer.  Other optimizations can infer
@@ -323,7 +342,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
           CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
 
         // If we have a call graph, update it.
-        if (CG) {
+        if (CallGraph *CG = IFI.CG) {
           CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
           CallGraphNode *CallerNode = (*CG)[Caller];
           CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
@@ -342,14 +361,14 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
     // (which can happen, e.g., because an argument was constant), but we'll be
     // happy with whatever the cloner can do.
     CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
-                              &InlinedFunctionInfo, TD, TheCall);
+                              &InlinedFunctionInfo, IFI.TD, TheCall);
 
     // Remember the first block that is newly cloned over.
     FirstNewBlock = LastBlock; ++FirstNewBlock;
 
     // Update the callgraph if requested.
-    if (CG)
-      UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG);
+    if (IFI.CG)
+      UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI);
   }
 
   // If there are any alloca instructions in the block that used to be the entry
@@ -376,13 +395,13 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
       
       // Keep track of the static allocas that we inline into the caller if the
       // StaticAllocas pointer is non-null.
-      if (StaticAllocas) StaticAllocas->push_back(AI);
+      IFI.StaticAllocas.push_back(AI);
       
       // Scan for the block of allocas that we can move over, and move them
       // all at once.
       while (isa<AllocaInst>(I) &&
              isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
-        if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I));
+        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
         ++I;
       }
 
@@ -406,7 +425,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
     // If we are preserving the callgraph, add edges to the stacksave/restore
     // functions for the calls we insert.
     CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
-    if (CG) {
+    if (CallGraph *CG = IFI.CG) {
       StackSaveCGN    = CG->getOrInsertFunction(StackSave);
       StackRestoreCGN = CG->getOrInsertFunction(StackRestore);
       CallerNode = (*CG)[Caller];
@@ -415,13 +434,13 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
     // Insert the llvm.stacksave.
     CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
                                           FirstNewBlock->begin());
-    if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
+    if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
 
     // Insert a call to llvm.stackrestore before any return instructions in the
     // inlined function.
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
       CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
-      if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+      if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
     }
 
     // Count the number of StackRestore calls we insert.
@@ -434,7 +453,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
            BB != E; ++BB)
         if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
           CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI);
-          if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+          if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
           ++NumStackRestores;
         }
     }
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index ac59b4d..84fd1eb 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -183,8 +183,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
   // For the first iteration of the loop, we should use the precloned values for
   // PHI nodes.  Insert associations now.
-  typedef DenseMap<const Value*, Value*> ValueMapTy;
-  ValueMapTy LastValueMap;
+  typedef DenseMap<const Value*, Value*> ValueToValueMapTy;
+  ValueToValueMapTy LastValueMap;
   std::vector<PHINode*> OrigPHINode;
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
     PHINode *PN = cast<PHINode>(I);
@@ -205,7 +205,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
     
     for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
          E = LoopBlocks.end(); BB != E; ++BB) {
-      ValueMapTy ValueMap;
+      ValueToValueMapTy ValueMap;
       BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It));
       Header->getParent()->getBasicBlockList().push_back(New);
 
@@ -224,7 +224,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
       // Update our running map of newest clones
       LastValueMap[*BB] = New;
-      for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
+      for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
            VI != VE; ++VI)
         LastValueMap[VI->first] = VI->second;
 
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index bbbcc1a..0ed8c72 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -70,15 +70,18 @@ namespace {
     // Used for expensive EH support.
     const Type *JBLinkTy;
     GlobalVariable *JBListHead;
-    Constant *SetJmpFn, *LongJmpFn;
+    Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+    bool useExpensiveEHSupport;
 
     // We peek in TLI to grab the target's jmp_buf size and alignment
     const TargetLowering *TLI;
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LowerInvoke(const TargetLowering *tli = NULL)
-      : FunctionPass(&ID), TLI(tli) { }
+    explicit LowerInvoke(const TargetLowering *tli = NULL,
+                         bool useExpensiveEHSupport = ExpensiveEHSupport)
+      : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport),
+        TLI(tli) { }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
 
@@ -94,7 +97,8 @@ namespace {
     bool insertCheapEHSupport(Function &F);
     void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes);
     void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
-                                AllocaInst *InvokeNum, SwitchInst *CatchSwitch);
+                                AllocaInst *InvokeNum, AllocaInst *StackPtr,
+                                SwitchInst *CatchSwitch);
     bool insertExpensiveEHSupport(Function &F);
   };
 }
@@ -107,7 +111,11 @@ const PassInfo *const llvm::LowerInvokePassID = &X;
 
 // Public Interface To the LowerInvoke pass.
 FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
-  return new LowerInvoke(TLI);
+  return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+                                          bool useExpensiveEHSupport) {
+  return new LowerInvoke(TLI, useExpensiveEHSupport);
 }
 
 // doInitialization - Make sure that there is a prototype for abort in the
@@ -116,7 +124,7 @@ bool LowerInvoke::doInitialization(Module &M) {
   const Type *VoidPtrTy =
           Type::getInt8PtrTy(M.getContext());
   AbortMessage = 0;
-  if (ExpensiveEHSupport) {
+  if (useExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
     unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
     JBSize = JBSize ? JBSize : 200;
@@ -160,6 +168,8 @@ bool LowerInvoke::doInitialization(Module &M) {
 #endif
 
     LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+    StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+    StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
   }
 
   // We need the 'write' and 'abort' functions for both models.
@@ -175,7 +185,7 @@ bool LowerInvoke::doInitialization(Module &M) {
 }
 
 void LowerInvoke::createAbortMessage(Module *M) {
-  if (ExpensiveEHSupport) {
+  if (useExpensiveEHSupport) {
     // The abort message for expensive EH support tells the user that the
     // program 'unwound' without an 'invoke' instruction.
     Constant *Msg =
@@ -229,7 +239,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3);
       // Insert a normal call instruction...
       CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                           CallArgs.begin(), CallArgs.end(), "",II);
+                                           CallArgs.begin(), CallArgs.end(),
+                                           "",II);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
@@ -270,6 +281,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 /// specified invoke instruction with a call.
 void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
                                          AllocaInst *InvokeNum,
+                                         AllocaInst *StackPtr,
                                          SwitchInst *CatchSwitch) {
   ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
                                             InvokeNo);
@@ -288,12 +300,22 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
   // Insert a store of the invoke num before the invoke and store zero into the
   // location afterward.
   new StoreInst(InvokeNoC, InvokeNum, true, II);  // volatile
+  
+  // Insert a store of the stack ptr before the invoke, so we can restore it
+  // later in the exception case.
+  CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+  new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
 
   BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
   // nonvolatile.
   new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
                 InvokeNum, false, NI);
 
+  Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
+                                           II->getUnwindDest()->getFirstNonPHI()
+                                           );
+  CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+    
   // Add a switch case to our unwind block.
   CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
 
@@ -500,6 +522,12 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     BasicBlock *CatchBB =
             BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
 
+    // Create an alloca which keeps track of the stack pointer before every
+    // invoke, this allows us to properly restore the stack pointer after
+    // long jumping.
+    AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+                                          "stackptr", EntryBB->begin());
+
     // Create an alloca which keeps track of which invoke is currently
     // executing.  For normal calls it contains zero.
     AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
@@ -546,7 +574,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 
     // At this point, we are all set up, rewrite each invoke instruction.
     for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
-      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, CatchSwitch);
+      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
   }
 
   // We know that there is at least one unwind.
@@ -622,7 +650,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 }
 
 bool LowerInvoke::runOnFunction(Function &F) {
-  if (ExpensiveEHSupport)
+  if (useExpensiveEHSupport)
     return insertExpensiveEHSupport(F);
   else
     return insertCheapEHSupport(F);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index a31235a..25d50db 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,34 +11,52 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "ssaupdater"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
-typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > >
-                IncomingPredInfoTy;
-
+/// BBInfo - Per-basic block information used internally by SSAUpdater.
+/// The predecessors of each block are cached here since pred_iterator is
+/// slow and we need to iterate over the blocks at least a few times.
+class SSAUpdater::BBInfo {
+public:
+  BasicBlock *BB;      // Back-pointer to the corresponding block.
+  Value *AvailableVal; // Value to use in this block.
+  BBInfo *DefBB;       // Block that defines the available value.
+  int BlkNum;          // Postorder number.
+  BBInfo *IDom;        // Immediate dominator.
+  unsigned NumPreds;   // Number of predecessor blocks.
+  BBInfo **Preds;      // Array[NumPreds] of predecessor blocks.
+  PHINode *PHITag;     // Marker for existing PHIs that match.
+
+  BBInfo(BasicBlock *ThisBB, Value *V)
+    : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
+      NumPreds(0), Preds(0), PHITag(0) { }
+};
+
+typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
-  return *static_cast<IncomingPredInfoTy*>(IPI);
+static BBMapTy *getBBMap(void *BM) {
+  return static_cast<BBMapTy*>(BM);
 }
 
-
 SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
-  : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {}
+  : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
   delete &getAvailableVals(AV);
-  delete &getIncomingPredInfo(IPI);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
@@ -48,11 +66,6 @@ void SSAUpdater::Initialize(Value *ProtoValue) {
     AV = new AvailableValsTy();
   else
     getAvailableVals(AV).clear();
-
-  if (IPI == 0)
-    IPI = new IncomingPredInfoTy();
-  else
-    getIncomingPredInfo(IPI).clear();
   PrototypeValue = ProtoValue;
 }
 
@@ -73,7 +86,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
 
 /// IsEquivalentPHI - Check if PHI has the same incoming value as specified
 /// in ValueMapping for each predecessor block.
-static bool IsEquivalentPHI(PHINode *PHI, 
+static bool IsEquivalentPHI(PHINode *PHI,
                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
   unsigned PHINumValues = PHI->getNumIncomingValues();
   if (PHINumValues != ValueMapping.size())
@@ -89,38 +102,12 @@ static bool IsEquivalentPHI(PHINode *PHI,
   return true;
 }
 
-/// GetExistingPHI - Check if BB already contains a phi node that is equivalent
-/// to the specified mapping from predecessor blocks to incoming values.
-static Value *GetExistingPHI(BasicBlock *BB,
-                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
-  PHINode *SomePHI;
-  for (BasicBlock::iterator It = BB->begin();
-       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-    if (IsEquivalentPHI(SomePHI, ValueMapping))
-      return SomePHI;
-  }
-  return 0;
-}
-
-/// GetExistingPHI - Check if BB already contains an equivalent phi node.
-/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*>
-/// objects that specify the mapping from predecessor blocks to incoming values.
-template<typename InputIt>
-static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I,
-                             const InputIt &E) {
-  // Avoid create the mapping if BB has no phi nodes at all.
-  if (!isa<PHINode>(BB->begin()))
-    return 0;
-  DenseMap<BasicBlock*, Value*> ValueMapping(I, E);
-  return GetExistingPHI(BB, ValueMapping);
-}
-
 /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
 /// live at the end of the specified block.
 Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
-  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  assert(BM == 0 && "Unexpected Internal State");
   Value *Res = GetValueAtEndOfBlockInternal(BB);
-  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  assert(BM == 0 && "Unexpected Internal State");
   return Res;
 }
 
@@ -146,7 +133,7 @@ Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
 Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
-  if (!getAvailableVals(AV).count(BB))
+  if (!HasValueForBlock(BB))
     return GetValueAtEndOfBlock(BB);
 
   // Otherwise, we have the hard case.  Get the live-in values for each
@@ -193,10 +180,18 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   if (SingularValue != 0)
     return SingularValue;
 
-  // Otherwise, we do need a PHI.
-  if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(),
-                                          PredValues.end()))
-    return ExistingPHI;
+  // Otherwise, we do need a PHI: check to see if we already have one available
+  // in this block that produces the right value.
+  if (isa<PHINode>(BB->begin())) {
+    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+                                               PredValues.end());
+    PHINode *SomePHI;
+    for (BasicBlock::iterator It = BB->begin();
+         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+      if (IsEquivalentPHI(SomePHI, ValueMapping))
+        return SomePHI;
+    }
+  }
 
   // Ok, we have no way out, insert a new one now.
   PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
@@ -226,7 +221,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
 /// which use their value in the corresponding predecessor.
 void SSAUpdater::RewriteUse(Use &U) {
   Instruction *User = cast<Instruction>(U.getUser());
-  
+
   Value *V;
   if (PHINode *UserPN = dyn_cast<PHINode>(User))
     V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
@@ -236,161 +231,427 @@ void SSAUpdater::RewriteUse(Use &U) {
   U.set(V);
 }
 
-
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
-/// walking predecessors inserting PHI nodes as needed until we get to a block
-/// where the value is available.
-///
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
 Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (Value *V = AvailableVals[BB])
+    return V;
+
+  // Pool allocation used internally by GetValueAtEndOfBlock.
+  BumpPtrAllocator Allocator;
+  BBMapTy BBMapObj;
+  BM = &BBMapObj;
+
+  SmallVector<BBInfo*, 100> BlockList;
+  BuildBlockList(BB, &BlockList, &Allocator);
 
-  // Query AvailableVals by doing an insertion of null.
-  std::pair<AvailableValsTy::iterator, bool> InsertRes =
-    AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>()));
-
-  // Handle the case when the insertion fails because we have already seen BB.
-  if (!InsertRes.second) {
-    // If the insertion failed, there are two cases.  The first case is that the
-    // value is already available for the specified block.  If we get this, just
-    // return the value.
-    if (InsertRes.first->second != 0)
-      return InsertRes.first->second;
-
-    // Otherwise, if the value we find is null, then this is the value is not
-    // known but it is being computed elsewhere in our recursion.  This means
-    // that we have a cycle.  Handle this by inserting a PHI node and returning
-    // it.  When we get back to the first instance of the recursion we will fill
-    // in the PHI node.
-    return InsertRes.first->second =
-      PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
-                      &BB->front());
+  // Special case: bail out if BB is unreachable.
+  if (BlockList.size() == 0) {
+    BM = 0;
+    return UndefValue::get(PrototypeValue->getType());
   }
 
-  // Okay, the value isn't in the map and we just inserted a null in the entry
-  // to indicate that we're processing the block.  Since we have no idea what
-  // value is in this block, we have to recurse through our predecessors.
-  //
-  // While we're walking our predecessors, we keep track of them in a vector,
-  // then insert a PHI node in the end if we actually need one.  We could use a
-  // smallvector here, but that would take a lot of stack space for every level
-  // of the recursion, just use IncomingPredInfo as an explicit stack.
-  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
-  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
-
-  // As we're walking the predecessors, keep track of whether they are all
-  // producing the same value.  If so, this value will capture it, if not, it
-  // will get reset to null.  We distinguish the no-predecessor case explicitly
-  // below.
-  TrackingVH<Value> ExistingValue;
+  FindDominators(&BlockList);
+  FindPHIPlacement(&BlockList);
+  FindAvailableVals(&BlockList);
 
-  // We can get our predecessor info by walking the pred_iterator list, but it
-  // is relatively slow.  If we already have PHI nodes in this block, walk one
-  // of them to get the predecessor list instead.
+  BM = 0;
+  return BBMapObj[BB]->DefBB->AvailableVal;
+}
+
+/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+/// vector, set Info->NumPreds, and allocate space in Info->Preds.
+static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info,
+                                  SmallVectorImpl<BasicBlock*> *Preds,
+                                  BumpPtrAllocator *Allocator) {
+  // We can get our predecessor info by walking the pred_iterator list,
+  // but it is relatively slow.  If we already have PHI nodes in this
+  // block, walk one of them to get the predecessor list instead.
+  BasicBlock *BB = Info->BB;
   if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
-    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
-      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
-      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
-      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+    for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+      Preds->push_back(SomePhi->getIncomingBlock(PI));
+  } else {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Preds->push_back(*PI);
+  }
 
-      // Set ExistingValue to singular value from all predecessors so far.
-      if (i == 0)
-        ExistingValue = PredVal;
-      else if (PredVal != ExistingValue)
-        ExistingValue = 0;
+  Info->NumPreds = Preds->size();
+  Info->Preds = static_cast<SSAUpdater::BBInfo**>
+    (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*),
+                         AlignOf<SSAUpdater::BBInfo*>::Alignment));
+}
+
+/// BuildBlockList - Starting from the specified basic block, traverse back
+/// through its predecessors until reaching blocks with known values.  Create
+/// BBInfo structures for the blocks and append them to the block list.
+void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList,
+                                BumpPtrAllocator *Allocator) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<BBInfo*, 10> RootList;
+  SmallVector<BBInfo*, 64> WorkList;
+
+  BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
+  (*BBMap)[BB] = Info;
+  WorkList.push_back(Info);
+
+  // Search backward from BB, creating BBInfos along the way and stopping when
+  // reaching blocks that define the value.  Record those defining blocks on
+  // the RootList.
+  SmallVector<BasicBlock*, 10> Preds;
+  while (!WorkList.empty()) {
+    Info = WorkList.pop_back_val();
+    Preds.clear();
+    FindPredecessorBlocks(Info, &Preds, Allocator);
+
+    // Treat an unreachable predecessor as a definition with 'undef'.
+    if (Info->NumPreds == 0) {
+      Info->AvailableVal = UndefValue::get(PrototypeValue->getType());
+      Info->DefBB = Info;
+      RootList.push_back(Info);
+      continue;
     }
-  } else {
-    bool isFirstPred = true;
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      BasicBlock *PredBB = *PI;
-      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
-      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
 
-      // Set ExistingValue to singular value from all predecessors so far.
-      if (isFirstPred) {
-        ExistingValue = PredVal;
-        isFirstPred = false;
-      } else if (PredVal != ExistingValue)
-        ExistingValue = 0;
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      BasicBlock *Pred = Preds[p];
+      // Check if BBMap already has a BBInfo for the predecessor block.
+      BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
+      if (BBMapBucket.second) {
+        Info->Preds[p] = BBMapBucket.second;
+        continue;
+      }
+
+      // Create a new BBInfo for the predecessor.
+      Value *PredVal = AvailableVals.lookup(Pred);
+      BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
+      BBMapBucket.second = PredInfo;
+      Info->Preds[p] = PredInfo;
+
+      if (PredInfo->AvailableVal) {
+        RootList.push_back(PredInfo);
+        continue;
+      }
+      WorkList.push_back(PredInfo);
+    }
+  }
+
+  // Now that we know what blocks are backwards-reachable from the starting
+  // block, do a forward depth-first traversal to assign postorder numbers
+  // to those blocks.
+  BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
+  unsigned BlkNum = 1;
+
+  // Initialize the worklist with the roots from the backward traversal.
+  while (!RootList.empty()) {
+    Info = RootList.pop_back_val();
+    Info->IDom = PseudoEntry;
+    Info->BlkNum = -1;
+    WorkList.push_back(Info);
+  }
+
+  while (!WorkList.empty()) {
+    Info = WorkList.back();
+
+    if (Info->BlkNum == -2) {
+      // All the successors have been handled; assign the postorder number.
+      Info->BlkNum = BlkNum++;
+      // If not a root, put it on the BlockList.
+      if (!Info->AvailableVal)
+        BlockList->push_back(Info);
+      WorkList.pop_back();
+      continue;
+    }
+
+    // Leave this entry on the worklist, but set its BlkNum to mark that its
+    // successors have been put on the worklist.  When it returns to the top
+    // the list, after handling its successors, it will be assigned a number.
+    Info->BlkNum = -2;
+
+    // Add unvisited successors to the work list.
+    for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB);
+         SI != E; ++SI) {
+      BBInfo *SuccInfo = (*BBMap)[*SI];
+      if (!SuccInfo || SuccInfo->BlkNum)
+        continue;
+      SuccInfo->BlkNum = -1;
+      WorkList.push_back(SuccInfo);
     }
   }
+  PseudoEntry->BlkNum = BlkNum;
+}
 
-  // If there are no predecessors, then we must have found an unreachable block
-  // just return 'undef'.  Since there are no predecessors, InsertRes must not
-  // be invalidated.
-  if (IncomingPredInfo.size() == FirstPredInfoEntry)
-    return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
-
-  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
-  /// this block is involved in a loop, a no-entry PHI node will have been
-  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
-  /// above.
-  TrackingVH<Value> &InsertedVal = AvailableVals[BB];
-
-  // If the predecessor values are not all the same, then check to see if there
-  // is an existing PHI that can be used.
-  if (!ExistingValue)
-    ExistingValue = GetExistingPHI(BB,
-                                   IncomingPredInfo.begin()+FirstPredInfoEntry,
-                                   IncomingPredInfo.end());
-
-  // If there is an existing value we can use, then we don't need to insert a
-  // PHI.  This is the simple and common case.
-  if (ExistingValue) {
-    // If a PHI node got inserted, replace it with the existing value and delete
-    // it.
-    if (InsertedVal) {
-      PHINode *OldVal = cast<PHINode>(InsertedVal);
-      // Be careful about dead loops.  These RAUW's also update InsertedVal.
-      if (InsertedVal != ExistingValue)
-        OldVal->replaceAllUsesWith(ExistingValue);
-      else
-        OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
-      OldVal->eraseFromParent();
-    } else {
-      InsertedVal = ExistingValue;
+/// IntersectDominators - This is the dataflow lattice "meet" operation for
+/// finding dominators.  Given two basic blocks, it walks up the dominator
+/// tree until it finds a common dominator of both.  It uses the postorder
+/// number of the blocks to determine how to do that.
+static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1,
+                                               SSAUpdater::BBInfo *Blk2) {
+  while (Blk1 != Blk2) {
+    while (Blk1->BlkNum < Blk2->BlkNum) {
+      Blk1 = Blk1->IDom;
+      if (!Blk1)
+        return Blk2;
     }
+    while (Blk2->BlkNum < Blk1->BlkNum) {
+      Blk2 = Blk2->IDom;
+      if (!Blk2)
+        return Blk1;
+    }
+  }
+  return Blk1;
+}
 
-    // Either path through the 'if' should have set InsertedVal -> ExistingVal.
-    assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) &&
-           "RAUW didn't change InsertedVal to be ExistingValue");
+/// FindDominators - Calculate the dominator tree for the subset of the CFG
+/// corresponding to the basic blocks on the BlockList.  This uses the
+/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
+/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
+/// Because the CFG subset does not include any edges leading into blocks that
+/// define the value, the results are not the usual dominator tree.  The CFG
+/// subset has a single pseudo-entry node with edges to a set of root nodes
+/// for blocks that define the value.  The dominators for this subset CFG are
+/// not the standard dominators but they are adequate for placing PHIs within
+/// the subset CFG.
+void SSAUpdater::FindDominators(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // Start with the first predecessor.
+      assert(Info->NumPreds > 0 && "unreachable block");
+      BBInfo *NewIDom = Info->Preds[0];
+
+      // Iterate through the block's other predecessors.
+      for (unsigned p = 1; p != Info->NumPreds; ++p) {
+        BBInfo *Pred = Info->Preds[p];
+        NewIDom = IntersectDominators(NewIDom, Pred);
+      }
+
+      // Check if the IDom value has changed.
+      if (NewIDom != Info->IDom) {
+        Info->IDom = NewIDom;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
 
-    // Drop the entries we added in IncomingPredInfo to restore the stack.
-    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                           IncomingPredInfo.end());
-    return ExistingValue;
+/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
+/// any blocks containing definitions of the value.  If one is found, then the
+/// successor of Pred is in the dominance frontier for the definition, and
+/// this function returns true.
+static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred,
+                               const SSAUpdater::BBInfo *IDom) {
+  for (; Pred != IDom; Pred = Pred->IDom) {
+    if (Pred->DefBB == Pred)
+      return true;
   }
+  return false;
+}
 
-  // Otherwise, we do need a PHI: insert one now if we don't already have one.
-  if (InsertedVal == 0)
-    InsertedVal = PHINode::Create(PrototypeValue->getType(),
-                                  PrototypeValue->getName(), &BB->front());
+/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
+/// the known definitions.  Iteratively add PHIs in the dom frontiers until
+/// nothing changes.  Along the way, keep track of the nearest dominating
+/// definitions for non-PHI blocks.
+void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
+  bool Changed;
+  do {
+    Changed = false;
+    // Iterate over the list in reverse order, i.e., forward on CFG edges.
+    for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      // If this block already needs a PHI, there is nothing to do here.
+      if (Info->DefBB == Info)
+        continue;
+
+      // Default to use the same def as the immediate dominator.
+      BBInfo *NewDefBB = Info->IDom->DefBB;
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
+          // Need a PHI here.
+          NewDefBB = Info;
+          break;
+        }
+      }
+
+      // Check if anything changed.
+      if (NewDefBB != Info->DefBB) {
+        Info->DefBB = NewDefBB;
+        Changed = true;
+      }
+    }
+  } while (Changed);
+}
 
-  PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
-  InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
+/// FindAvailableVal - If this block requires a PHI, first check if an existing
+/// PHI matches the PHI placement and reaching definitions computed earlier,
+/// and if not, create a new PHI.  Visit all the block's predecessors to
+/// calculate the available value for each one and fill in the incoming values
+/// for a new PHI.
+void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
 
-  // Fill in all the predecessors of the PHI.
-  for (IncomingPredInfoTy::iterator I =
-         IncomingPredInfo.begin()+FirstPredInfoEntry,
-       E = IncomingPredInfo.end(); I != E; ++I)
-    InsertedPHI->addIncoming(I->second, I->first);
+  // Go through the worklist in forward order (i.e., backward through the CFG)
+  // and check if existing PHIs can be used.  If not, create empty PHIs where
+  // they are needed.
+  for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+       I != E; ++I) {
+    BBInfo *Info = *I;
+    // Check if there needs to be a PHI in BB.
+    if (Info->DefBB != Info)
+      continue;
+
+    // Look for an existing PHI.
+    FindExistingPHI(Info->BB, BlockList);
+    if (Info->AvailableVal)
+      continue;
+
+    PHINode *PHI = PHINode::Create(PrototypeValue->getType(),
+                                   PrototypeValue->getName(),
+                                   &Info->BB->front());
+    PHI->reserveOperandSpace(Info->NumPreds);
+    Info->AvailableVal = PHI;
+    AvailableVals[Info->BB] = PHI;
+  }
 
-  // Drop the entries we added in IncomingPredInfo to restore the stack.
-  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
-                         IncomingPredInfo.end());
+  // Now go back through the worklist in reverse order to fill in the arguments
+  // for any new PHIs added in the forward traversal.
+  for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
+         E = BlockList->rend(); I != E; ++I) {
+    BBInfo *Info = *I;
+
+    if (Info->DefBB != Info) {
+      // Record the available value at join nodes to speed up subsequent
+      // uses of this SSAUpdater for the same value.
+      if (Info->NumPreds > 1)
+        AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
+      continue;
+    }
 
-  // See if the PHI node can be merged to a single value.  This can happen in
-  // loop cases when we get a PHI of itself and one other value.
-  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
-    InsertedPHI->replaceAllUsesWith(ConstVal);
-    InsertedPHI->eraseFromParent();
-    InsertedVal = ConstVal;
-  } else {
-    DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    // Check if this block contains a newly added PHI.
+    PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal);
+    if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds)
+      continue;
+
+    // Iterate through the block's predecessors.
+    for (unsigned p = 0; p != Info->NumPreds; ++p) {
+      BBInfo *PredInfo = Info->Preds[p];
+      BasicBlock *Pred = PredInfo->BB;
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+      PHI->addIncoming(PredInfo->AvailableVal, Pred);
+    }
+
+    DEBUG(dbgs() << "  Inserted PHI: " << *PHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
-    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+    if (InsertedPHIs) InsertedPHIs->push_back(PHI);
+  }
+}
+
+/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+/// them match what is needed.
+void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) {
+  PHINode *SomePHI;
+  for (BasicBlock::iterator It = BB->begin();
+       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+    if (CheckIfPHIMatches(SomePHI)) {
+      RecordMatchingPHI(SomePHI);
+      break;
+    }
+    // Match failed: clear all the PHITag values.
+    for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
+         I != E; ++I)
+      (*I)->PHITag = 0;
+  }
+}
+
+/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+/// in the BBMap.
+bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  SmallVector<PHINode*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Mark that the block containing this PHI has been visited.
+  (*BBMap)[PHI->getParent()]->PHITag = PHI;
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+      Value *IncomingVal = PHI->getIncomingValue(i);
+      BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)];
+      // Skip to the nearest preceding definition.
+      if (PredInfo->DefBB != PredInfo)
+        PredInfo = PredInfo->DefBB;
+
+      // Check if it matches the expected value.
+      if (PredInfo->AvailableVal) {
+        if (IncomingVal == PredInfo->AvailableVal)
+          continue;
+        return false;
+      }
+
+      // Check if the value is a PHI in the correct block.
+      PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal);
+      if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB)
+        return false;
+
+      // If this block has already been visited, check if this PHI matches.
+      if (PredInfo->PHITag) {
+        if (IncomingPHIVal == PredInfo->PHITag)
+          continue;
+        return false;
+      }
+      PredInfo->PHITag = IncomingPHIVal;
+
+      WorkList.push_back(IncomingPHIVal);
+    }
   }
+  return true;
+}
 
-  return InsertedVal;
+/// RecordMatchingPHI - For a PHI node that matches, record it and its input
+/// PHIs in both the BBMap and the AvailableVals mapping.
+void SSAUpdater::RecordMatchingPHI(PHINode *PHI) {
+  BBMapTy *BBMap = getBBMap(BM);
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  SmallVector<PHINode*, 20> WorkList;
+  WorkList.push_back(PHI);
+
+  // Record this PHI.
+  BasicBlock *BB = PHI->getParent();
+  AvailableVals[BB] = PHI;
+  (*BBMap)[BB]->AvailableVal = PHI;
+
+  while (!WorkList.empty()) {
+    PHI = WorkList.pop_back_val();
+
+    // Iterate through the PHI's incoming values.
+    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+      PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
+      if (!IncomingPHIVal) continue;
+      BB = IncomingPHIVal->getParent();
+      BBInfo *Info = (*BBMap)[BB];
+      if (!Info || Info->AvailableVal)
+        continue;
+
+      // Record the PHI and add it to the worklist.
+      AvailableVals[BB] = IncomingPHIVal;
+      Info->AvailableVal = IncomingPHIVal;
+      WorkList.push_back(IncomingPHIVal);
+    }
+  }
 }
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 6045048..87ce631 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "ValueMapper.h"
 #include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -20,7 +20,7 @@
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
   Value *&VMSlot = VM[V];
   if (VMSlot) return VMSlot;      // Does it exist in the map yet?
   
@@ -127,7 +127,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by ValueMap.
 ///
-void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) {
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
     Value *V = MapValue(*op, ValueMap);
     assert(V && "Referenced value not in value map!");
diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h
new file mode 100644
index 0000000..d61c24c
--- /dev/null
+++ b/lib/Transforms/Utils/ValueMapper.h
@@ -0,0 +1,29 @@
+//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue interface which is used by various parts of
+// the Transforms/Utils library to implement cloning and linking facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VALUEMAPPER_H
+#define VALUEMAPPER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+  class Value;
+  class Instruction;
+  typedef DenseMap<const Value *, Value *> ValueToValueMapTy;
+
+  Value *MapValue(const Value *V, ValueToValueMapTy &VM);
+  void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
+} // End llvm namespace
+
+#endif
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index f6a6076..6c1aa5e 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -227,13 +227,15 @@ void TypePrinting::CalcTypeName(const Type *Ty,
     const StructType *STy = cast<StructType>(Ty);
     if (STy->isPacked())
       OS << '<';
-    OS << "{ ";
+    OS << '{';
     for (StructType::element_iterator I = STy->element_begin(),
          E = STy->element_end(); I != E; ++I) {
+      OS << ' ';
       CalcTypeName(*I, TypeStack, OS);
-      if (next(I) != STy->element_end())
+      if (next(I) == STy->element_end())
+        OS << ' ';
+      else
         OS << ',';
-      OS << ' ';
     }
     OS << '}';
     if (STy->isPacked())
@@ -242,13 +244,15 @@ void TypePrinting::CalcTypeName(const Type *Ty,
   }
   case Type::UnionTyID: {
     const UnionType *UTy = cast<UnionType>(Ty);
-    OS << "union { ";
+    OS << "union {";
     for (StructType::element_iterator I = UTy->element_begin(),
          E = UTy->element_end(); I != E; ++I) {
+      OS << ' ';
       CalcTypeName(*I, TypeStack, OS);
-      if (next(I) != UTy->element_end())
+      if (next(I) == UTy->element_end())
+        OS << ' ';
+      else
         OS << ',';
-      OS << ' ';
     }
     OS << '}';
     break;
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 4d06b66..0144210 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
 #include <cstring>
 using namespace llvm;
 
@@ -277,8 +278,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       // Calls to these intrinsics are transformed into vector multiplies.
       NewFn = 0;
       return true;
+    } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
+               Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
+      // Calls to these intrinsics are transformed into vector shuffles, shifts,
+      // or 0.
+      NewFn = 0;
+      return true;           
     }
-    
 
     break;
   }
@@ -420,6 +426,118 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         
       // Remove upgraded multiply.
       CI->eraseFromParent();
+    } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
+      Value *Op1 = CI->getOperand(1);
+      Value *Op2 = CI->getOperand(2);
+      Value *Op3 = CI->getOperand(3);
+      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
+      Value *Rep;
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      // If palignr is shifting the pair of input vectors less than 9 bytes,
+      // emit a shuffle instruction.
+      if (shiftVal <= 8) {
+        const Type *IntTy = Type::getInt32Ty(C);
+        const Type *EltTy = Type::getInt8Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 8);
+        
+        Op2 = Builder.CreateBitCast(Op2, VecTy);
+        Op1 = Builder.CreateBitCast(Op1, VecTy);
+
+        llvm::SmallVector<llvm::Constant*, 8> Indices;
+        for (unsigned i = 0; i != 8; ++i)
+          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
+
+        Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
+        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
+      }
+
+      // If palignr is shifting the pair of input vectors more than 8 but less
+      // than 16 bytes, emit a logical right shift of the destination.
+      else if (shiftVal < 16) {
+        // MMX has these as 1 x i64 vectors for some odd optimization reasons.
+        const Type *EltTy = Type::getInt64Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 1);
+
+        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
+        Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
+
+        // create i32 constant
+        Function *I =
+          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
+        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
+      }
+
+      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
+      else {
+        Rep = Constant::getNullValue(F->getReturnType());
+      }
+      
+      // Replace any uses with our new instruction.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Rep);
+        
+      // Remove upgraded instruction.
+      CI->eraseFromParent();
+      
+    } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
+      Value *Op1 = CI->getOperand(1);
+      Value *Op2 = CI->getOperand(2);
+      Value *Op3 = CI->getOperand(3);
+      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
+      Value *Rep;
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      // If palignr is shifting the pair of input vectors less than 17 bytes,
+      // emit a shuffle instruction.
+      if (shiftVal <= 16) {
+        const Type *IntTy = Type::getInt32Ty(C);
+        const Type *EltTy = Type::getInt8Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 16);
+        
+        Op2 = Builder.CreateBitCast(Op2, VecTy);
+        Op1 = Builder.CreateBitCast(Op1, VecTy);
+
+        llvm::SmallVector<llvm::Constant*, 16> Indices;
+        for (unsigned i = 0; i != 16; ++i)
+          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
+
+        Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
+        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
+      }
+
+      // If palignr is shifting the pair of input vectors more than 16 but less
+      // than 32 bytes, emit a logical right shift of the destination.
+      else if (shiftVal < 32) {
+        const Type *EltTy = Type::getInt64Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 2);
+        const Type *IntTy = Type::getInt32Ty(C);
+
+        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
+        Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
+
+        // create i32 constant
+        Function *I =
+          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
+        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
+      }
+
+      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
+      else {
+        Rep = Constant::getNullValue(F->getReturnType());
+      }
+      
+      // Replace any uses with our new instruction.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Rep);
+        
+      // Remove upgraded instruction.
+      CI->eraseFromParent();
+      
     } else {
       llvm_unreachable("Unknown function for CallInst upgrade.");
     }
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 1553bd5..00b0094 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -1224,20 +1224,20 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
 
 Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
   if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getCast(Instruction::BitCast, C, Ty);
-  return getCast(Instruction::ZExt, C, Ty);
+    return getBitCast(C, Ty);
+  return getZExt(C, Ty);
 }
 
 Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
   if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getCast(Instruction::BitCast, C, Ty);
-  return getCast(Instruction::SExt, C, Ty);
+    return getBitCast(C, Ty);
+  return getSExt(C, Ty);
 }
 
 Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
   if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getCast(Instruction::BitCast, C, Ty);
-  return getCast(Instruction::Trunc, C, Ty);
+    return getBitCast(C, Ty);
+  return getTrunc(C, Ty);
 }
 
 Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
@@ -1245,8 +1245,8 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
   assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
 
   if (Ty->isIntegerTy())
-    return getCast(Instruction::PtrToInt, S, Ty);
-  return getCast(Instruction::BitCast, S, Ty);
+    return getPtrToInt(S, Ty);
+  return getBitCast(S, Ty);
 }
 
 Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
@@ -1450,12 +1450,6 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate,
 
 Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
                             unsigned Flags) {
-  // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C1->getType()->isFPOrFPVectorTy()) {
-    if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
-    else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub;
-    else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul;
-  }
 #ifndef NDEBUG
   switch (Opcode) {
   case Instruction::Add:
@@ -1523,8 +1517,8 @@ Constant* ConstantExpr::getSizeOf(const Type* Ty) {
   Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *GEP = getGetElementPtr(
                  Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
-  return getCast(Instruction::PtrToInt, GEP, 
-                 Type::getInt64Ty(Ty->getContext()));
+  return getPtrToInt(GEP, 
+                     Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant* ConstantExpr::getAlignOf(const Type* Ty) {
@@ -1537,8 +1531,8 @@ Constant* ConstantExpr::getAlignOf(const Type* Ty) {
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *Indices[2] = { Zero, One };
   Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
-  return getCast(Instruction::PtrToInt, GEP,
-                 Type::getInt64Ty(Ty->getContext()));
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
@@ -1555,8 +1549,8 @@ Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
   };
   Constant *GEP = getGetElementPtr(
                 Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2);
-  return getCast(Instruction::PtrToInt, GEP,
-                 Type::getInt64Ty(Ty->getContext()));
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant *ConstantExpr::getCompare(unsigned short pred, 
@@ -1840,9 +1834,6 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
 }
 
 Constant* ConstantExpr::getNeg(Constant* C) {
-  // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C->getType()->isFPOrFPVectorTy())
-    return getFNeg(C);
   assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
   return get(Instruction::Sub,
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 634407c..bbf1375 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -119,6 +119,11 @@ void LLVMDumpModule(LLVMModuleRef M) {
   unwrap(M)->dump();
 }
 
+/*--.. Operations on inline assembler ......................................--*/
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
+  unwrap(M)->setModuleInlineAsm(StringRef(Asm));
+}
+
 
 /*===-- Operations on types -----------------------------------------------===*/
 
@@ -322,8 +327,7 @@ LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
   return wrap(UnionType::get(&Tys[0], Tys.size()));
 }
 
-LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes,
-                           unsigned ElementCount, int Packed) {
+LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, unsigned ElementCount) {
   return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes,
                                 ElementCount);
 }
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 3441750..10a866f 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -30,9 +30,9 @@ using namespace llvm;
 
 // Always verify dominfo if expensive checking is enabled.
 #ifdef XDEBUG
-bool VerifyDomInfo = true;
+static bool VerifyDomInfo = true;
 #else
-bool VerifyDomInfo = false;
+static bool VerifyDomInfo = false;
 #endif
 static cl::opt<bool,true>
 VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
@@ -119,7 +119,7 @@ void DominanceFrontier::verifyAnalysis() const {
   assert(!compare(OtherDF) && "Invalid DominanceFrontier info!");
 }
 
-// NewBB is split and now it has one successor. Update dominace frontier to
+// NewBB is split and now it has one successor. Update dominance frontier to
 // reflect this change.
 void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
   assert(NewBB->getTerminator()->getNumSuccessors() == 1
@@ -129,7 +129,7 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
   SmallVector<BasicBlock*, 8> PredBlocks;
   for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB);
        PI != PE; ++PI)
-      PredBlocks.push_back(*PI);  
+    PredBlocks.push_back(*PI);  
 
   if (PredBlocks.empty())
     // If NewBB does not have any predecessors then it is a entry block.
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 4609a64..f64b220 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -30,80 +30,6 @@ using namespace llvm;
 //                            CallSite Class
 //===----------------------------------------------------------------------===//
 
-#define CALLSITE_DELEGATE_GETTER(METHOD) \
-  Instruction *II = getInstruction();    \
-  return isCall()                        \
-    ? cast<CallInst>(II)->METHOD         \
-    : cast<InvokeInst>(II)->METHOD
-
-#define CALLSITE_DELEGATE_SETTER(METHOD) \
-  Instruction *II = getInstruction();    \
-  if (isCall())                          \
-    cast<CallInst>(II)->METHOD;          \
-  else                                   \
-    cast<InvokeInst>(II)->METHOD
-
-CallingConv::ID CallSite::getCallingConv() const {
-  CALLSITE_DELEGATE_GETTER(getCallingConv());
-}
-void CallSite::setCallingConv(CallingConv::ID CC) {
-  CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
-}
-const AttrListPtr &CallSite::getAttributes() const {
-  CALLSITE_DELEGATE_GETTER(getAttributes());
-}
-void CallSite::setAttributes(const AttrListPtr &PAL) {
-  CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
-}
-bool CallSite::paramHasAttr(uint16_t i, Attributes attr) const {
-  CALLSITE_DELEGATE_GETTER(paramHasAttr(i, attr));
-}
-uint16_t CallSite::getParamAlignment(uint16_t i) const {
-  CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
-}
-
-/// @brief Return true if the call should not be inlined.
-bool CallSite::isNoInline() const {
-  CALLSITE_DELEGATE_GETTER(isNoInline());
-}
-
-void CallSite::setIsNoInline(bool Value) {
-  CALLSITE_DELEGATE_GETTER(setIsNoInline(Value));
-}
-
-
-bool CallSite::doesNotAccessMemory() const {
-  CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
-}
-void CallSite::setDoesNotAccessMemory(bool doesNotAccessMemory) {
-  CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory(doesNotAccessMemory));
-}
-bool CallSite::onlyReadsMemory() const {
-  CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
-}
-void CallSite::setOnlyReadsMemory(bool onlyReadsMemory) {
-  CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory(onlyReadsMemory));
-}
-bool CallSite::doesNotReturn() const {
- CALLSITE_DELEGATE_GETTER(doesNotReturn());
-}
-void CallSite::setDoesNotReturn(bool doesNotReturn) {
-  CALLSITE_DELEGATE_SETTER(setDoesNotReturn(doesNotReturn));
-}
-bool CallSite::doesNotThrow() const {
-  CALLSITE_DELEGATE_GETTER(doesNotThrow());
-}
-void CallSite::setDoesNotThrow(bool doesNotThrow) {
-  CALLSITE_DELEGATE_SETTER(setDoesNotThrow(doesNotThrow));
-}
-
-bool CallSite::hasArgument(const Value *Arg) const {
-  for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E; ++AI)
-    if (AI->get() == Arg)
-      return true;
-  return false;
-}
-
 User::op_iterator CallSite::getCallee() const {
   Instruction *II(getInstruction());
   return isCall()
@@ -111,9 +37,6 @@ User::op_iterator CallSite::getCallee() const {
     : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function
 }
 
-#undef CALLSITE_DELEGATE_GETTER
-#undef CALLSITE_DELEGATE_SETTER
-
 //===----------------------------------------------------------------------===//
 //                            TerminatorInst Class
 //===----------------------------------------------------------------------===//
@@ -1639,43 +1562,29 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
 //                             BinaryOperator Class
 //===----------------------------------------------------------------------===//
 
-/// AdjustIType - Map Add, Sub, and Mul to FAdd, FSub, and FMul when the
-/// type is floating-point, to help provide compatibility with an older API.
-///
-static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
-                                             const Type *Ty) {
-  // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (Ty->isFPOrFPVectorTy()) {
-    if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd;
-    else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub;
-    else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul;
-  }
-  return iType;
-}
-
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
                                const Type *Ty, const Twine &Name,
                                Instruction *InsertBefore)
-  : Instruction(Ty, AdjustIType(iType, Ty),
+  : Instruction(Ty, iType,
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertBefore) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(AdjustIType(iType, Ty));
+  init(iType);
   setName(Name);
 }
 
 BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
                                const Type *Ty, const Twine &Name,
                                BasicBlock *InsertAtEnd)
-  : Instruction(Ty, AdjustIType(iType, Ty),
+  : Instruction(Ty, iType,
                 OperandTraits<BinaryOperator>::op_begin(this),
                 OperandTraits<BinaryOperator>::operands(this),
                 InsertAtEnd) {
   Op<0>() = S1;
   Op<1>() = S2;
-  init(AdjustIType(iType, Ty));
+  init(iType);
   setName(Name);
 }
 
@@ -2060,7 +1969,7 @@ unsigned CastInst::isEliminableCastPair(
   // FPEXT         <       FloatPt      n/a        FloatPt      n/a   
   // PTRTOINT     n/a      Pointer      n/a        Integral   Unsigned
   // INTTOPTR     n/a      Integral   Unsigned     Pointer      n/a
-  // BITCONVERT    =       FirstClass   n/a       FirstClass    n/a   
+  // BITCAST       =       FirstClass   n/a       FirstClass    n/a   
   //
   // NOTE: some transforms are safe, but we consider them to be non-profitable.
   // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 3244f28..4d61363 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Instruction.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/SourceMgr.h"
 #include "LLVMContextImpl.h"
 using namespace llvm;
 
@@ -33,6 +34,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
+//===----------------------------------------------------------------------===//
+// Recoverable Backend Errors
+//===----------------------------------------------------------------------===//
+
 void LLVMContext::setInlineAsmDiagnosticHandler(void *DiagHandler, 
                                                 void *DiagContext) {
   pImpl->InlineAsmDiagHandler = DiagHandler;
@@ -51,6 +56,39 @@ void *LLVMContext::getInlineAsmDiagnosticContext() const {
   return pImpl->InlineAsmDiagContext;
 }
 
+void LLVMContext::emitError(StringRef ErrorStr) {
+  emitError(0U, ErrorStr);
+}
+
+void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
+  unsigned LocCookie = 0;
+  if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
+    if (SrcLoc->getNumOperands() != 0)
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
+        LocCookie = CI->getZExtValue();
+  }
+  return emitError(LocCookie, ErrorStr);
+}
+
+void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
+  // If there is no error handler installed, just print the error and exit.
+  if (pImpl->InlineAsmDiagHandler == 0) {
+    errs() << "error: " << ErrorStr << "\n";
+    exit(1);
+  }
+  
+  // If we do have an error handler, we can report the error and keep going.
+  SMDiagnostic Diag("", "error: " + ErrorStr.str());
+  
+  ((SourceMgr::DiagHandlerTy)(intptr_t)pImpl->InlineAsmDiagHandler)
+      (Diag, pImpl->InlineAsmDiagContext, LocCookie);
+  
+}
+
+//===----------------------------------------------------------------------===//
+// Metadata Kind Uniquing
+//===----------------------------------------------------------------------===//
+
 #ifndef NDEBUG
 /// isValidName - Return true if Name is a valid custom metadata handler name.
 static bool isValidName(StringRef MDName) {
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index e71157f..9e41a08 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -13,6 +13,7 @@
 
 #include "LLVMContextImpl.h"
 #include <algorithm>
+using namespace llvm;
 
 LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
   : TheTrueVal(0), TheFalseVal(0),
diff --git a/lib/VMCore/LeaksContext.h b/lib/VMCore/LeaksContext.h
index abff090..b9e59d4 100644
--- a/lib/VMCore/LeaksContext.h
+++ b/lib/VMCore/LeaksContext.h
@@ -14,7 +14,8 @@
 
 #include "llvm/Value.h"
 #include "llvm/ADT/SmallPtrSet.h"
-using namespace llvm;
+
+namespace llvm {
 
 template <class T>
 struct PrinterTrait {
@@ -87,3 +88,5 @@ private:
   const T* Cache;
   const char* Name;
 };
+
+}
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
index 4395ecf..03a4fc7 100644
--- a/lib/VMCore/Makefile
+++ b/lib/VMCore/Makefile
@@ -1,4 +1,4 @@
-##===- lib/VMCore/Makefile ------------------------------*- Makefile -*-===##
+##===- lib/VMCore/Makefile ---------------------------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 72de032..092fe00 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -178,6 +178,13 @@ void MDNode::destroy() {
   free(this);
 }
 
+/// isFunctionLocalValue - Return true if this is a value that would require a
+/// function-local MDNode.
+static bool isFunctionLocalValue(Value *V) {
+  return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+         (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
+}
+
 MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
                           unsigned NumVals, FunctionLocalness FL,
                           bool Insert) {
@@ -188,8 +195,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
     for (unsigned i = 0; i != NumVals; ++i) {
       Value *V = Vals[i];
       if (!V) continue;
-      if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
-          (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) {
+      if (isFunctionLocalValue(V)) {
         isFunctionLocal = true;
         break;
       }
@@ -262,6 +268,13 @@ void MDNode::setIsNotUniqued() {
 void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
   Value *From = *Op;
 
+  // If is possible that someone did GV->RAUW(inst), replacing a global variable
+  // with an instruction or some other function-local object.  If this is a
+  // non-function-local MDNode, it can't point to a function-local object.
+  // Handle this case by implicitly dropping the MDNode reference to null.
+  if (!isFunctionLocal() && To && isFunctionLocalValue(To))
+    To = 0;
+  
   if (From == To)
     return;
 
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 6b941f3..a60877d 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -318,6 +318,8 @@ static PassRegistrar *getPassRegistrar() {
   return PassRegistrarObj;
 }
 
+namespace {
+
 // FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
 // Unfortunately, passes are registered with static ctors, and having
 // llvm_shutdown clear this map prevents successful ressurection after 
@@ -329,7 +331,9 @@ void cleanupPassRegistrar(void*) {
     PassRegistrarObj = 0;
   }
 }
-ManagedCleanup<&cleanupPassRegistrar> registrarCleanup;
+ManagedCleanup<&cleanupPassRegistrar> registrarCleanup ATTRIBUTE_USED;
+
+}
 
 // getPassInfo - Return the PassInfo data structure that corresponds to this
 // pass...
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 6ca35ac..b28fdeb 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -1293,9 +1293,8 @@ void FunctionPassManager::add(Pass *P) {
 bool FunctionPassManager::run(Function &F) {
   if (F.isMaterializable()) {
     std::string errstr;
-    if (F.Materialize(&errstr)) {
-      llvm_report_error("Error reading bitcode file: " + errstr);
-    }
+    if (F.Materialize(&errstr))
+      report_fatal_error("Error reading bitcode file: " + Twine(errstr));
   }
   return FPM->run(F);
 }
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 5f9c11f..845b523 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -380,6 +380,10 @@ const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
   return &C.pImpl->PPC_FP128Ty;
 }
 
+const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+  return IntegerType::get(C, N);
+}
+
 const IntegerType *Type::getInt1Ty(LLVMContext &C) {
   return &C.pImpl->Int1Ty;
 }
@@ -420,6 +424,10 @@ const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
   return getPPC_FP128Ty(C)->getPointerTo(AS);
 }
 
+const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+  return getIntNTy(C, N)->getPointerTo(AS);
+}
+
 const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
   return getInt1Ty(C)->getPointerTo(AS);
 }
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index b4daf0f..d68a44b 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -126,13 +126,15 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
   // faster to remove them all in one pass.
   //
   for (iterator I = begin(), E = end(); I != E; ++I) {
-    if (I->second == (Type*)OldType) {  // FIXME when Types aren't const.
+    // FIXME when Types aren't const.
+    if (I->second == const_cast<DerivedType *>(OldType)) {
 #if DEBUG_ABSTYPE
       dbgs() << "Removing type " << OldType->getDescription() << "\n";
 #endif
       OldType->removeAbstractTypeUser(this);
 
-      I->second = (Type*)NewType;  // TODO FIXME when types aren't const
+      // TODO FIXME when types aren't const
+      I->second = const_cast<Type *>(NewType);
       if (NewType->isAbstract()) {
 #if DEBUG_ABSTYPE
         dbgs() << "Added type " << NewType->getDescription() << "\n";
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index c18168d..6ad4272 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -93,7 +93,7 @@ namespace {  // Anonymous namespace for class
       }
 
       if (Broken)
-        llvm_report_error("Broken module, no Basic Block terminator!");
+        report_fatal_error("Broken module, no Basic Block terminator!");
 
       return false;
     }
@@ -176,6 +176,10 @@ namespace {
     /// Types - keep track of the types that have been checked already.
     TypeSet Types;
 
+    /// MDNodes - keep track of the metadata nodes that have been checked
+    /// already.
+    SmallPtrSet<MDNode *, 32> MDNodes;
+
     Verifier()
       : FunctionPass(&ID), 
       Broken(false), RealPass(true), action(AbortProcessAction),
@@ -244,6 +248,10 @@ namespace {
            I != E; ++I)
         visitGlobalAlias(*I);
 
+      for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I)
+        visitNamedMDNode(*I);
+
       // If the module is broken, abort at this time.
       return abortIfBroken();
     }
@@ -284,6 +292,8 @@ namespace {
     void visitGlobalValue(GlobalValue &GV);
     void visitGlobalVariable(GlobalVariable &GV);
     void visitGlobalAlias(GlobalAlias &GA);
+    void visitNamedMDNode(NamedMDNode &NMD);
+    void visitMDNode(MDNode &MD, Function *F);
     void visitFunction(Function &F);
     void visitBasicBlock(BasicBlock &BB);
     using InstVisitor<Verifier>::visit;
@@ -333,8 +343,6 @@ namespace {
                           int VT, unsigned ArgNo, std::string &Suffix);
     void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
                                   unsigned RetNum, unsigned ParamNum, ...);
-    void VerifyFunctionLocalMetadata(MDNode *N, Function *F,
-                                     SmallPtrSet<MDNode *, 32> &Visited);
     void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
                               bool isReturnValue, const Value *V);
     void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
@@ -489,6 +497,54 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) {
   visitGlobalValue(GA);
 }
 
+void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
+  for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
+    MDNode *MD = NMD.getOperand(i);
+    if (!MD)
+      continue;
+
+    Assert2(!MD->isFunctionLocal(),
+            "Named metadata operand cannot be function local!", &NMD, MD);
+    visitMDNode(*MD, 0);
+  }
+}
+
+void Verifier::visitMDNode(MDNode &MD, Function *F) {
+  // Only visit each node once.  Metadata can be mutually recursive, so this
+  // avoids infinite recursion here, as well as being an optimization.
+  if (!MDNodes.insert(&MD))
+    return;
+
+  for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
+    Value *Op = MD.getOperand(i);
+    if (!Op)
+      continue;
+    if (isa<Constant>(Op) || isa<MDString>(Op) || isa<NamedMDNode>(Op))
+      continue;
+    if (MDNode *N = dyn_cast<MDNode>(Op)) {
+      Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
+              "Global metadata operand cannot be function local!", &MD, N);
+      visitMDNode(*N, F);
+      continue;
+    }
+    Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
+
+    // If this was an instruction, bb, or argument, verify that it is in the
+    // function that we expect.
+    Function *ActualF = 0;
+    if (Instruction *I = dyn_cast<Instruction>(Op))
+      ActualF = I->getParent()->getParent();
+    else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
+      ActualF = BB->getParent();
+    else if (Argument *A = dyn_cast<Argument>(Op))
+      ActualF = A->getParent();
+    assert(ActualF && "Unimplemented function local metadata case!");
+
+    Assert2(ActualF == F, "function-local metadata used in wrong function",
+            &MD, Op);
+  }
+}
+
 void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
   for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I)
     VerifyType(I->second);
@@ -1553,38 +1609,6 @@ void Verifier::VerifyType(const Type *Ty) {
   }
 }
 
-/// VerifyFunctionLocalMetadata - Verify that the specified MDNode is local to
-/// specified Function.
-void Verifier::VerifyFunctionLocalMetadata(MDNode *N, Function *F,
-                                           SmallPtrSet<MDNode *, 32> &Visited) {
-  assert(N->isFunctionLocal() && "Should only be called on function-local MD");
-
-  // Only visit each node once.
-  if (!Visited.insert(N))
-    return;
-  
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    Value *V = N->getOperand(i);
-    if (!V) continue;
-    
-    Function *ActualF = 0;
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      ActualF = I->getParent()->getParent();
-    else if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
-      ActualF = BB->getParent();
-    else if (Argument *A = dyn_cast<Argument>(V))
-      ActualF = A->getParent();
-    else if (MDNode *MD = dyn_cast<MDNode>(V))
-      if (MD->isFunctionLocal())
-        VerifyFunctionLocalMetadata(MD, F, Visited);
-
-    // If this was an instruction, bb, or argument, verify that it is in the
-    // function that we expect.
-    Assert1(ActualF == 0 || ActualF == F,
-            "function-local metadata used in wrong function", N);
-  }
-}
-
 // Flags used by TableGen to mark intrinsic parameters with the
 // LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes.
 static const unsigned ExtendedElementVectorType = 0x40000000;
@@ -1604,11 +1628,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   // If the intrinsic takes MDNode arguments, verify that they are either global
   // or are local to *this* function.
   for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
-    if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i))) {
-      if (!MD->isFunctionLocal()) continue;
-      SmallPtrSet<MDNode *, 32> Visited;
-      VerifyFunctionLocalMetadata(MD, CI.getParent()->getParent(), Visited);
-    }
+    if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i)))
+      visitMDNode(*MD, CI.getParent()->getParent());
 
   switch (ID) {
   default:
@@ -1933,7 +1954,9 @@ FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
 }
 
 
-// verifyFunction - Create
+/// verifyFunction - Check a function for errors, printing messages on stderr.
+/// Return true if the function is corrupt.
+///
 bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
   Function &F = const_cast<Function&>(f);
   assert(!F.isDeclaration() && "Cannot verify external functions");