154 files changed, 4231 insertions, 2941 deletions
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 143220c..8ada5a3 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -31,12 +31,6 @@
 #include <fstream>
 using namespace llvm;
 
-/// CFGOnly flag - This is used to control whether or not the CFG graph printer
-/// prints out the contents of basic blocks or not.  This is acceptable because
-/// this code is only really used for debugging purposes.
-///
-static bool CFGOnly = false;
-
 namespace llvm {
 template<>
 struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
@@ -45,12 +39,13 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
   }
 
   static std::string getNodeLabel(const BasicBlock *Node,
-                                  const Function *Graph) {
-    if (CFGOnly && !Node->getName().empty())
+                                  const Function *Graph,
+                                  bool ShortNames) {
+    if (ShortNames && !Node->getName().empty())
       return Node->getName() + ":";
 
     std::ostringstream Out;
-    if (CFGOnly) {
+    if (ShortNames) {
       WriteAsOperand(Out, Node, false);
       return Out.str();
     }
@@ -117,9 +112,7 @@ namespace {
     CFGOnlyViewer() : FunctionPass(&ID) {}
 
     virtual bool runOnFunction(Function &F) {
-      CFGOnly = true;
       F.viewCFG();
-      CFGOnly = false;
       return false;
     }
 
@@ -168,14 +161,20 @@ static RegisterPass<CFGPrinter>
 P1("dot-cfg", "Print CFG of function to 'dot' file", false, true);
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGOnlyPrinter : public CFGPrinter {
+  struct VISIBILITY_HIDDEN CFGOnlyPrinter : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGOnlyPrinter() : CFGPrinter(&ID) {}
+    CFGOnlyPrinter() : FunctionPass(&ID) {}
+    explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {}
     virtual bool runOnFunction(Function &F) {
-      bool OldCFGOnly = CFGOnly;
-      CFGOnly = true;
-      CFGPrinter::runOnFunction(F);
-      CFGOnly = OldCFGOnly;
+      std::string Filename = "cfg." + F.getName() + ".dot";
+      cerr << "Writing '" << Filename << "'...";
+      std::ofstream File(Filename.c_str());
+
+      if (File.good())
+        WriteGraph(File, (const Function*)&F, true);
+      else
+        cerr << "  error opening file for writing!";
+      cerr << "\n";
       return false;
     }
     void print(std::ostream &OS, const Module* = 0) const {}
@@ -206,9 +205,7 @@ void Function::viewCFG() const {
 /// his can make the graph smaller.
 ///
 void Function::viewCFGOnly() const {
-  CFGOnly = true;
-  viewCFG();
-  CFGOnly = false;
+  ViewGraph(this, "cfg" + getName(), true);
 }
 
 FunctionPass *llvm::createCFGPrinterPass () {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 093aa69..6f2a06c 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_library(LLVMAnalysis
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
   LiveValues.cpp
+  LoopDependenceAnalysis.cpp
   LoopInfo.cpp
   LoopPass.cpp
   LoopVR.cpp
@@ -32,3 +33,5 @@ add_llvm_library(LLVMAnalysis
   Trace.cpp
   ValueTracking.cpp
   )
+
+target_link_libraries (LLVMAnalysis LLVMSupport)
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index d80d581..6c549e63 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -93,7 +93,7 @@ void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) {
   DISubprogram Subprogram(cast<GlobalVariable>(FS->getSubprogram()));
   std::string Res1, Res2;
   Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1)
-      << " return type: " << Subprogram.getType().getName(Res2)
+      << " return type: " << Subprogram.getReturnTypeName(Res2)
       << " at line " << Subprogram.getLineNumber()
       << "\n\n";
 }
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index adda5ee..6b27cf4 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -73,22 +73,22 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
   return true;
 }
 
-DIDescriptor::DIDescriptor(GlobalVariable *gv, unsigned RequiredTag) {
-  GV = gv;
+DIDescriptor::DIDescriptor(GlobalVariable *GV, unsigned RequiredTag) {
+  DbgGV = GV;
   
   // If this is non-null, check to see if the Tag matches. If not, set to null.
   if (GV && getTag() != RequiredTag)
-    GV = 0;
+    DbgGV = 0;
 }
 
 const std::string &
 DIDescriptor::getStringField(unsigned Elt, std::string &Result) const {
-  if (GV == 0) {
+  if (DbgGV == 0) {
     Result.clear();
     return Result;
   }
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands()) {
     Result.clear();
     return Result;
@@ -102,9 +102,9 @@ DIDescriptor::getStringField(unsigned Elt, std::string &Result) const {
 }
 
 uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (GV == 0) return 0;
+  if (DbgGV == 0) return 0;
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands())
     return 0;
 
@@ -114,9 +114,9 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
 }
 
 DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  if (GV == 0) return DIDescriptor();
+  if (DbgGV == 0) return DIDescriptor();
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands())
     return DIDescriptor();
 
@@ -125,9 +125,9 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
 }
 
 GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  if (GV == 0) return 0;
+  if (DbgGV == 0) return 0;
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands())
     return 0;
 
@@ -140,12 +140,12 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
 //===----------------------------------------------------------------------===//
 
 // Needed by DIVariable::getType().
-DIType::DIType(GlobalVariable *gv) : DIDescriptor(gv) {
-  if (!gv) return;
+DIType::DIType(GlobalVariable *GV) : DIDescriptor(GV) {
+  if (!GV) return;
   unsigned tag = getTag();
   if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) &&
       !DICompositeType::isCompositeType(tag))
-    GV = 0;
+    DbgGV = 0;
 }
 
 /// isDerivedType - Return true if the specified tag is legal for
@@ -198,8 +198,8 @@ bool DIVariable::isVariable(unsigned Tag) {
 }
 
 unsigned DIArray::getNumElements() const {
-  assert (GV && "Invalid DIArray");
-  Constant *C = GV->getInitializer();
+  assert (DbgGV && "Invalid DIArray");
+  Constant *C = DbgGV->getInitializer();
   assert (C && "Invalid DIArray initializer");
   return C->getNumOperands();
 }
@@ -367,71 +367,10 @@ Constant *DIFactory::GetStringConstant(const std::string &String) {
   return Slot = ConstantExpr::getBitCast(StrGV, DestTy);
 }
 
-/// GetOrCreateAnchor - Look up an anchor for the specified tag and name.  If it
-/// already exists, return it.  If not, create a new one and return it.
-DIAnchor DIFactory::GetOrCreateAnchor(unsigned TAG, const char *Name) {
-  const Type *EltTy = StructType::get(Type::Int32Ty, Type::Int32Ty, NULL);
-  
-  // Otherwise, create the global or return it if already in the module.
-  Constant *C = M.getOrInsertGlobal(Name, EltTy);
-  assert(isa<GlobalVariable>(C) && "Incorrectly typed anchor?");
-  GlobalVariable *GV = cast<GlobalVariable>(C);
-  
-  // If it has an initializer, it is already in the module.
-  if (GV->hasInitializer()) 
-    return SubProgramAnchor = DIAnchor(GV);
-  
-  GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
-  GV->setSection("llvm.metadata");
-  GV->setConstant(true);
-  M.addTypeName("llvm.dbg.anchor.type", EltTy);
-  
-  // Otherwise, set the initializer.
-  Constant *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_anchor),
-    ConstantInt::get(Type::Int32Ty, TAG)
-  };
-  
-  GV->setInitializer(ConstantStruct::get(Elts, 2));
-  return DIAnchor(GV);
-}
-
-
-
 //===----------------------------------------------------------------------===//
 // DIFactory: Primary Constructors
 //===----------------------------------------------------------------------===//
 
-/// GetOrCreateCompileUnitAnchor - Return the anchor for compile units,
-/// creating a new one if there isn't already one in the module.
-DIAnchor DIFactory::GetOrCreateCompileUnitAnchor() {
-  // If we already created one, just return it.
-  if (!CompileUnitAnchor.isNull())
-    return CompileUnitAnchor;
-  return CompileUnitAnchor = GetOrCreateAnchor(dwarf::DW_TAG_compile_unit,
-                                               "llvm.dbg.compile_units");
-}
-
-/// GetOrCreateSubprogramAnchor - Return the anchor for subprograms,
-/// creating a new one if there isn't already one in the module.
-DIAnchor DIFactory::GetOrCreateSubprogramAnchor() {
-  // If we already created one, just return it.
-  if (!SubProgramAnchor.isNull())
-    return SubProgramAnchor;
-  return SubProgramAnchor = GetOrCreateAnchor(dwarf::DW_TAG_subprogram,
-                                              "llvm.dbg.subprograms");
-}
-
-/// GetOrCreateGlobalVariableAnchor - Return the anchor for globals,
-/// creating a new one if there isn't already one in the module.
-DIAnchor DIFactory::GetOrCreateGlobalVariableAnchor() {
-  // If we already created one, just return it.
-  if (!GlobalVariableAnchor.isNull())
-    return GlobalVariableAnchor;
-  return GlobalVariableAnchor = GetOrCreateAnchor(dwarf::DW_TAG_variable,
-                                                  "llvm.dbg.global_variables");
-}
-
 /// GetOrCreateArray - Create an descriptor for an array of descriptors. 
 /// This implicitly uniques the arrays created.
 DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
@@ -494,7 +433,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
                                            unsigned RunTimeVer) {
   Constant *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_compile_unit),
-    getCastToEmpty(GetOrCreateCompileUnitAnchor()),
+    Constant::getNullValue(EmptyStructPtr),
     ConstantInt::get(Type::Int32Ty, LangID),
     GetStringConstant(Filename),
     GetStringConstant(Directory),
@@ -509,7 +448,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
   
   M.addTypeName("llvm.dbg.compile_unit.type", Init->getType());
   GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
+                                          GlobalValue::LinkOnceAnyLinkage,
                                           Init, "llvm.dbg.compile_unit", &M);
   GV->setSection("llvm.metadata");
   return DICompileUnit(GV);
@@ -655,7 +594,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
 
   Constant *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
-    getCastToEmpty(GetOrCreateSubprogramAnchor()),
+    Constant::getNullValue(EmptyStructPtr),
     getCastToEmpty(Context),
     GetStringConstant(Name),
     GetStringConstant(DisplayName),
@@ -671,7 +610,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
   
   M.addTypeName("llvm.dbg.subprogram.type", Init->getType());
   GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
+                                          GlobalValue::LinkOnceAnyLinkage,
                                           Init, "llvm.dbg.subprogram", &M);
   GV->setSection("llvm.metadata");
   return DISubprogram(GV);
@@ -687,7 +626,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
                                 bool isDefinition, llvm::GlobalVariable *Val) {
   Constant *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_variable),
-    getCastToEmpty(GetOrCreateGlobalVariableAnchor()),
+    Constant::getNullValue(EmptyStructPtr),
     getCastToEmpty(Context),
     GetStringConstant(Name),
     GetStringConstant(DisplayName),
@@ -704,7 +643,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
   
   M.addTypeName("llvm.dbg.global_variable.type", Init->getType());
   GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
+                                          GlobalValue::LinkOnceAnyLinkage,
                                           Init, "llvm.dbg.global_variable", &M);
   GV->setSection("llvm.metadata");
   return DIGlobalVariable(GV);
@@ -954,12 +893,42 @@ namespace llvm {
     Unit.getDirectory(Dir);
     return true;
   }
+
+  /// CollectDebugInfoAnchors - Collect debugging information anchors.
+  void CollectDebugInfoAnchors(Module &M,
+                               SmallVector<GlobalVariable *, 2> &CUs,
+                               SmallVector<GlobalVariable *, 4> &GVs,
+                               SmallVector<GlobalVariable *, 4> &SPs) {
+
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+       GVI != E; GVI++) {
+      GlobalVariable *GV = GVI;
+      if (GV->hasName() && strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0
+          && GV->isConstant() && GV->hasInitializer()) {
+        DICompileUnit C(GV);
+        if (C.isNull() == false) {
+          CUs.push_back(GV);
+          continue;
+        }
+        DIGlobalVariable G(GV);
+        if (G.isNull() == false) {
+          GVs.push_back(GV);
+          continue;
+        }
+        DISubprogram S(GV);
+        if (S.isNull() == false) {
+          SPs.push_back(GV);
+          continue;
+        }
+      }
+    }
+  }
 }
 
 /// dump - Print descriptor.
 void DIDescriptor::dump() const {
   cerr << "[" << dwarf::TagString(getTag()) << "] ";
-  cerr << std::hex << "[GV:" << GV << "]" << std::dec;
+  cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec;
 }
 
 /// dump - Print compile unit.
@@ -1000,11 +969,11 @@ void DIType::dump() const {
     cerr << " [fwd] ";
 
   if (isBasicType(Tag))
-    DIBasicType(GV).dump();
+    DIBasicType(DbgGV).dump();
   else if (isDerivedType(Tag))
-    DIDerivedType(GV).dump();
+    DIDerivedType(DbgGV).dump();
   else if (isCompositeType(Tag))
-    DICompositeType(GV).dump();
+    DICompositeType(DbgGV).dump();
   else {
     cerr << "Invalid DIType\n";
     return;
@@ -1051,7 +1020,7 @@ void DIGlobal::dump() const {
     cerr << " [def] ";
 
   if (isGlobalVariable(Tag))
-    DIGlobalVariable(GV).dump();
+    DIGlobalVariable(DbgGV).dump();
 
   cerr << "\n";
 }
@@ -1077,3 +1046,4 @@ void DIVariable::dump() const {
   getType().dump();
   cerr << "\n";
 }
+
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 8584d06..4ace049 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -65,6 +65,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/System/Atomic.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/DenseSet.h"
@@ -284,7 +285,8 @@ namespace {
 
       // Timestamp a node (used for work list prioritization)
       void Stamp() {
-        Timestamp = Counter++;
+        Timestamp = sys::AtomicIncrement(&Counter);
+        --Timestamp;
       }
 
       bool isRep() const {
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
new file mode 100644
index 0000000..172a2be
--- /dev/null
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -0,0 +1,47 @@
+//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the (beginning) of an implementation of a loop dependence analysis
+// framework, which is used to detect dependences in memory accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as implementation progresses.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lda"
+#include "llvm/Analysis/LoopDependenceAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+using namespace llvm;
+
+LoopPass *llvm::createLoopDependenceAnalysisPass() {
+  return new LoopDependenceAnalysis();
+}
+
+static RegisterPass<LoopDependenceAnalysis>
+R("lda", "Loop Dependence Analysis", false, true);
+char LoopDependenceAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                   LoopDependenceAnalysis Implementation
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
+  this->L = L;
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<ScalarEvolution>();
+}
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index 3a0a740..adb2bdc 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -73,7 +73,8 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F,
 //
 ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
                                      const std::string &Filename,
-                                     Module &TheModule) : M(TheModule) {
+                                     Module &TheModule) : 
+                              M(TheModule), Warned(false) {
   FILE *F = fopen(Filename.c_str(), "r");
   if (F == 0) {
     cerr << ToolName << ": Error opening '" << Filename << "': ";
@@ -200,7 +201,6 @@ void ProfileInfoLoader::getBlockCounts(std::vector<std::pair<BasicBlock*,
         Counts.back().second += EdgeCounts[i].second;
         unsigned SuccNum = EdgeCounts[i].first.second;
         if (SuccNum >= TI->getNumSuccessors()) {
-          static bool Warned = false;
           if (!Warned) {
             cerr << "WARNING: profile info doesn't seem to match"
                  << " the program!\n";
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 5cbb5fa..dcb179af 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -95,7 +95,8 @@ STATISTIC(NumBruteForceTripCountsComputed,
 static cl::opt<unsigned>
 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
                         cl::desc("Maximum number of iterations SCEV will "
-                                 "symbolically execute a constant derived loop"),
+                                 "symbolically execute a constant "
+                                 "derived loop"),
                         cl::init(100));
 
 static RegisterPass<ScalarEvolution>
@@ -132,6 +133,12 @@ bool SCEV::isOne() const {
   return false;
 }
 
+bool SCEV::isAllOnesValue() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isAllOnesValue();
+  return false;
+}
+
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
   SCEV(scCouldNotCompute) {}
 
@@ -150,10 +157,11 @@ bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const {
   return false;
 }
 
-const SCEV* SCEVCouldNotCompute::
-replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                  const SCEV* Conc,
-                                  ScalarEvolution &SE) const {
+const SCEV *
+SCEVCouldNotCompute::replaceSymbolicValuesWithConcrete(
+                                                    const SCEV *Sym,
+                                                    const SCEV *Conc,
+                                                    ScalarEvolution &SE) const {
   return this;
 }
 
@@ -165,11 +173,6 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) {
   return S->getSCEVType() == scCouldNotCompute;
 }
 
-
-// SCEVConstants - Only allow the creation of one SCEVConstant for any
-// particular value.  Don't use a const SCEV* here, or else the object will
-// never be deleted!
-
 const SCEV* ScalarEvolution::getConstant(ConstantInt *V) {
   SCEVConstant *&R = SCEVConstants[V];
   if (R == 0) R = new SCEVConstant(V);
@@ -199,10 +202,6 @@ bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return Op->dominates(BB, DT);
 }
 
-// SCEVTruncates - Only allow the creation of one SCEVTruncateExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will
-// never be deleted!
-
 SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty)
   : SCEVCastExpr(scTruncate, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
@@ -210,15 +209,10 @@ SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty)
          "Cannot truncate non-integer value!");
 }
 
-
 void SCEVTruncateExpr::print(raw_ostream &OS) const {
   OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-// SCEVZeroExtends - Only allow the creation of one SCEVZeroExtendExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEV* op, const Type *ty)
   : SCEVCastExpr(scZeroExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
@@ -230,10 +224,6 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
   OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-// SCEVSignExtends - Only allow the creation of one SCEVSignExtendExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
 SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEV* op, const Type *ty)
   : SCEVCastExpr(scSignExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
@@ -245,10 +235,6 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const {
   OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-// SCEVCommExprs - Only allow the creation of one SCEVCommutativeExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
 void SCEVCommutativeExpr::print(raw_ostream &OS) const {
   assert(Operands.size() > 1 && "This plus expr shouldn't exist!");
   const char *OpStr = getOperationStr();
@@ -258,10 +244,11 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const {
   OS << ")";
 }
 
-const SCEV* SCEVCommutativeExpr::
-replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                  const SCEV* Conc,
-                                  ScalarEvolution &SE) const {
+const SCEV *
+SCEVCommutativeExpr::replaceSymbolicValuesWithConcrete(
+                                                    const SCEV *Sym,
+                                                    const SCEV *Conc,
+                                                    ScalarEvolution &SE) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const SCEV* H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
@@ -298,11 +285,6 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return true;
 }
 
-
-// SCEVUDivs - Only allow the creation of one SCEVUDivExpr for any particular
-// input.  Don't use a const SCEV* here, or else the object will never be
-// deleted!
-
 bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
 }
@@ -320,14 +302,10 @@ const Type *SCEVUDivExpr::getType() const {
   return RHS->getType();
 }
 
-// SCEVAddRecExprs - Only allow the creation of one SCEVAddRecExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
-const SCEV* SCEVAddRecExpr::
-replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                  const SCEV* Conc,
-                                  ScalarEvolution &SE) const {
+const SCEV *
+SCEVAddRecExpr::replaceSymbolicValuesWithConcrete(const SCEV *Sym,
+                                                  const SCEV *Conc,
+                                                  ScalarEvolution &SE) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const SCEV* H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
@@ -349,12 +327,22 @@ replaceSymbolicValuesWithConcrete(const SCEV* Sym,
 
 
 bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
-  // This recurrence is invariant w.r.t to QueryLoop iff QueryLoop doesn't
-  // contain L and if the start is invariant.
   // Add recurrences are never invariant in the function-body (null loop).
-  return QueryLoop &&
-         !QueryLoop->contains(L->getHeader()) &&
-         getOperand(0)->isLoopInvariant(QueryLoop);
+  if (!QueryLoop)
+    return false;
+
+  // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L.
+  if (QueryLoop->contains(L->getHeader()))
+    return false;
+
+  // This recurrence is variant w.r.t. QueryLoop if any of its operands
+  // are variant.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (!getOperand(i)->isLoopInvariant(QueryLoop))
+      return false;
+
+  // Otherwise it's loop-invariant.
+  return true;
 }
 
 
@@ -365,10 +353,6 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "}<" << L->getHeader()->getName() + ">";
 }
 
-// SCEVUnknowns - Only allow the creation of one SCEVUnknown for any particular
-// value.  Don't use a const SCEV* here, or else the object will never be
-// deleted!
-
 bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
   // All non-instruction values are loop invariant.  All instructions are loop
   // invariant if they are not contained in the specified loop.
@@ -583,7 +567,7 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
   // safe in modular arithmetic.
   //
   // However, this code doesn't use exactly that formula; the formula it uses
-  // is something like the following, where T is the number of factors of 2 in 
+  // is something like the following, where T is the number of factors of 2 in
   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
   // exponentiation:
   //
@@ -595,7 +579,7 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
   // arithmetic.  To do exact division in modular arithmetic, all we have
   // to do is multiply by the inverse.  Therefore, this step can be done at
   // width W.
-  // 
+  //
   // The next issue is how to safely do the division by 2^T.  The way this
   // is done is by doing the multiplication step at a width of at least W + T
   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
@@ -713,8 +697,8 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
   Ty = getEffectiveSCEVType(Ty);
 
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
-    return getUnknown(
-        ConstantExpr::getTrunc(SC->getValue(), Ty));
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
 
   // trunc(trunc(x)) --> trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
@@ -753,7 +737,7 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
     const Type *IntTy = getEffectiveSCEVType(Ty);
     Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy);
     if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
-    return getUnknown(C);
+    return getConstant(cast<ConstantInt>(C));
   }
 
   // zext(zext(x)) --> zext(x)
@@ -841,7 +825,7 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
     const Type *IntTy = getEffectiveSCEVType(Ty);
     Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy);
     if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
-    return getUnknown(C);
+    return getConstant(cast<ConstantInt>(C));
   }
 
   // sext(sext(x)) --> sext(x)
@@ -1199,10 +1183,11 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
       Ops.clear();
       if (AccumulatedConstant != 0)
         Ops.push_back(getConstant(AccumulatedConstant));
-      for (std::map<APInt, SmallVector<const SCEV*, 4>, APIntCompare>::iterator I =
-           MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+      for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
+           I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
         if (I->first != 0)
-          Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second)));
+          Ops.push_back(getMulExpr(getConstant(I->first),
+                                   getAddExpr(I->second)));
       if (Ops.empty())
         return getIntegerSCEV(0, Ty);
       if (Ops.size() == 1)
@@ -1257,14 +1242,15 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
             const SCEV* InnerMul1 = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
-              SmallVector<const SCEV*, 4> MulOps(Mul->op_begin(), Mul->op_end());
+              SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                  Mul->op_end());
               MulOps.erase(MulOps.begin()+MulOp);
               InnerMul1 = getMulExpr(MulOps);
             }
             const SCEV* InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
-              SmallVector<const SCEV*, 4> MulOps(OtherMul->op_begin(),
-                                             OtherMul->op_end());
+              SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+                                                  OtherMul->op_end());
               MulOps.erase(MulOps.begin()+OMulOp);
               InnerMul2 = getMulExpr(MulOps);
             }
@@ -1330,7 +1316,8 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
         const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
         if (AddRec->getLoop() == OtherAddRec->getLoop()) {
           // Other + {A,+,B} + {C,+,D}  -->  Other + {A+C,+,B+D}
-          SmallVector<const SCEV*, 4> NewOps(AddRec->op_begin(), AddRec->op_end());
+          SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(),
+                                              AddRec->op_end());
           for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
             if (i >= NewOps.size()) {
               NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i,
@@ -1394,7 +1381,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ++Idx;
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() * 
+      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() *
                                            RHSC->getValue()->getValue());
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
@@ -1531,8 +1518,8 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
 
 /// getUDivExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
-const SCEV* ScalarEvolution::getUDivExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   assert(getEffectiveSCEVType(LHS->getType()) ==
          getEffectiveSCEVType(RHS->getType()) &&
          "SCEVUDivExpr operand types don't match!");
@@ -1611,7 +1598,8 @@ const SCEV* ScalarEvolution::getUDivExpr(const SCEV* LHS,
     if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
       Constant *LHSCV = LHSC->getValue();
       Constant *RHSCV = RHSC->getValue();
-      return getUnknown(ConstantExpr::getUDiv(LHSCV, RHSCV));
+      return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+                                                                 RHSCV)));
     }
   }
 
@@ -1640,8 +1628,9 @@ const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start,
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
-const SCEV* ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
-                                          const Loop *L) {
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
+                               const Loop *L) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -1662,8 +1651,29 @@ const SCEV* ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operand
       SmallVector<const SCEV*, 4> NestedOperands(NestedAR->op_begin(),
                                                 NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
-      NestedOperands[0] = getAddRecExpr(Operands, L);
-      return getAddRecExpr(NestedOperands, NestedLoop);
+      // AddRecs require their operands be loop-invariant with respect to their
+      // loops. Don't perform this transformation if it would break this
+      // requirement.
+      bool AllInvariant = true;
+      for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+        if (!Operands[i]->isLoopInvariant(L)) {
+          AllInvariant = false;
+          break;
+        }
+      if (AllInvariant) {
+        NestedOperands[0] = getAddRecExpr(Operands, L);
+        AllInvariant = true;
+        for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
+          if (!NestedOperands[i]->isLoopInvariant(NestedLoop)) {
+            AllInvariant = false;
+            break;
+          }
+        if (AllInvariant)
+          // Ok, both add recurrences are valid after the transformation.
+          return getAddRecExpr(NestedOperands, NestedLoop);
+      }
+      // Reset Operands to its original state.
+      Operands[0] = NestedAR;
     }
   }
 
@@ -1673,8 +1683,8 @@ const SCEV* ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operand
   return Result;
 }
 
-const SCEV* ScalarEvolution::getSMaxExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   SmallVector<const SCEV*, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
@@ -1711,10 +1721,14 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
-    // If we are left with a constant -inf, strip it off.
+    // If we are left with a constant minimum-int, strip it off.
     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
       Ops.erase(Ops.begin());
       --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+      // If we have an smax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
     }
   }
 
@@ -1760,8 +1774,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   return Result;
 }
 
-const SCEV* ScalarEvolution::getUMaxExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   SmallVector<const SCEV*, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
@@ -1798,10 +1812,14 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
-    // If we are left with a constant zero, strip it off.
+    // If we are left with a constant minimum-int, strip it off.
     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
       Ops.erase(Ops.begin());
       --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+      // If we have an umax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
     }
   }
 
@@ -1847,23 +1865,24 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   return Result;
 }
 
-const SCEV* ScalarEvolution::getSMinExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   // ~smax(~x, ~y) == smin(x, y).
   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
-const SCEV* ScalarEvolution::getUMinExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   // ~umax(~x, ~y) == umin(x, y)
   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
 const SCEV* ScalarEvolution::getUnknown(Value *V) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-    return getConstant(CI);
-  if (isa<ConstantPointerNull>(V))
-    return getIntegerSCEV(0, V->getType());
+  // Don't attempt to do anything other than create a SCEVUnknown object
+  // here.  createSCEV only calls getUnknown after checking for all other
+  // interesting possibilities, and any other code that calls getUnknown
+  // is doing so in order to hide a value from SCEV canonicalization.
+
   SCEVUnknown *&Result = SCEVUnknowns[V];
   if (Result == 0) Result = new SCEVUnknown(V);
   return Result;
@@ -1941,26 +1960,18 @@ const SCEV* ScalarEvolution::getSCEV(Value *V) {
   return S;
 }
 
-/// getIntegerSCEV - Given an integer or FP type, create a constant for the
+/// getIntegerSCEV - Given a SCEVable type, create a constant for the
 /// specified signed integer value and return a SCEV for the constant.
 const SCEV* ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
-  Ty = getEffectiveSCEVType(Ty);
-  Constant *C;
-  if (Val == 0)
-    C = Constant::getNullValue(Ty);
-  else if (Ty->isFloatingPoint())
-    C = ConstantFP::get(APFloat(Ty==Type::FloatTy ? APFloat::IEEEsingle :
-                                APFloat::IEEEdouble, Val));
-  else
-    C = ConstantInt::get(Ty, Val);
-  return getUnknown(C);
+  const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+  return getConstant(ConstantInt::get(ITy, Val));
 }
 
 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
 ///
 const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getUnknown(ConstantExpr::getNeg(VC->getValue()));
+    return getConstant(cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
@@ -1970,7 +1981,7 @@ const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) {
 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
 const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getUnknown(ConstantExpr::getNot(VC->getValue()));
+    return getConstant(cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
@@ -1980,8 +1991,8 @@ const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) {
 
 /// getMinusSCEV - Return a SCEV corresponding to LHS - RHS.
 ///
-const SCEV* ScalarEvolution::getMinusSCEV(const SCEV* LHS,
-                                         const SCEV* RHS) {
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
+                                          const SCEV *RHS) {
   // X - Y --> X + -Y
   return getAddExpr(LHS, getNegativeSCEV(RHS));
 }
@@ -2087,8 +2098,8 @@ ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) {
 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
 /// the types using zero-extension, and then perform a umax operation
 /// with them.
-const SCEV* ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV* LHS,
-                                                       const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
   const SCEV* PromotedLHS = LHS;
   const SCEV* PromotedRHS = RHS;
 
@@ -2103,8 +2114,8 @@ const SCEV* ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV* LHS,
 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
 /// the types using zero-extension, and then perform a umin operation
 /// with them.
-const SCEV* ScalarEvolution::getUMinFromMismatchedTypes(const SCEV* LHS,
-                                                       const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
   const SCEV* PromotedLHS = LHS;
   const SCEV* PromotedRHS = RHS;
 
@@ -2119,9 +2130,10 @@ const SCEV* ScalarEvolution::getUMinFromMismatchedTypes(const SCEV* LHS,
 /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
 /// the specified instruction and replaces any references to the symbolic value
 /// SymName with the specified value.  This is used during PHI resolution.
-void ScalarEvolution::
-ReplaceSymbolicValueWithConcrete(Instruction *I, const SCEV* SymName,
-                                 const SCEV* NewVal) {
+void
+ScalarEvolution::ReplaceSymbolicValueWithConcrete(Instruction *I,
+                                                  const SCEV *SymName,
+                                                  const SCEV *NewVal) {
   std::map<SCEVCallbackVH, const SCEV*>::iterator SI =
     Scalars.find(SCEVCallbackVH(I, this));
   if (SI == Scalars.end()) return;
@@ -2190,8 +2202,10 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
             if (Accum->isLoopInvariant(L) ||
                 (isa<SCEVAddRecExpr>(Accum) &&
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
-              const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
-              const SCEV* PHISCEV  = getAddRecExpr(StartVal, Accum, L);
+              const SCEV *StartVal =
+                getSCEV(PN->getIncomingValue(IncomingEdge));
+              const SCEV *PHISCEV =
+                getAddRecExpr(StartVal, Accum, L);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
               // to be symbolic.  We now need to go back and update all of the
@@ -2216,7 +2230,7 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
             // initial step of the addrec evolution.
             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
                                             AddRec->getOperand(1))) {
-              const SCEV* PHISCEV = 
+              const SCEV* PHISCEV =
                  getAddRecExpr(StartVal, AddRec->getOperand(1), L);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
@@ -2402,6 +2416,38 @@ ScalarEvolution::GetMinSignBits(const SCEV* S) {
             getTypeSizeInBits(C->getOperand()->getType()));
   }
 
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    unsigned BitWidth = getTypeSizeInBits(A->getType());
+
+    // Special case decrementing a value (ADD X, -1):
+    if (const SCEVConstant *CRHS = dyn_cast<SCEVConstant>(A->getOperand(0)))
+      if (CRHS->isAllOnesValue()) {
+        SmallVector<const SCEV *, 4> OtherOps(A->op_begin() + 1, A->op_end());
+        const SCEV *OtherOpsAdd = getAddExpr(OtherOps);
+        unsigned LZ = GetMinLeadingZeros(OtherOpsAdd);
+
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if (LZ == BitWidth - 1)
+          return BitWidth;
+
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (LZ > 0)
+          return GetMinSignBits(OtherOpsAdd);
+      }
+
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    unsigned Min = BitWidth;
+    for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+      unsigned N = GetMinSignBits(A->getOperand(i));
+      Min = std::min(Min, N) - 1;
+      if (Min == 0) return 1;
+    }
+    return 1;
+  }
+
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     return ComputeNumSignBits(U->getValue(), TD);
@@ -2422,6 +2468,12 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     Opcode = I->getOpcode();
   else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
     Opcode = CE->getOpcode();
+  else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return getConstant(CI);
+  else if (isa<ConstantPointerNull>(V))
+    return getIntegerSCEV(0, V->getType());
+  else if (isa<UndefValue>(V))
+    return getIntegerSCEV(0, V->getType());
   else
     return getUnknown(V);
 
@@ -2750,7 +2802,8 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
   SmallVector<Instruction *, 16> Worklist;
   for (BasicBlock::iterator I = Header->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    std::map<SCEVCallbackVH, const SCEV*>::iterator It = Scalars.find((Value*)I);
+    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+      Scalars.find((Value*)I);
     if (It != Scalars.end() && !isa<SCEVUnknown>(It->second))
       Worklist.push_back(PN);
   }
@@ -2775,7 +2828,6 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   const SCEV* BECount = CouldNotCompute;
   const SCEV* MaxBECount = CouldNotCompute;
   bool CouldNotComputeBECount = false;
-  bool CouldNotComputeMaxBECount = false;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BackedgeTakenInfo NewBTI =
       ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
@@ -2788,25 +2840,13 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     } else if (!CouldNotComputeBECount) {
       if (BECount == CouldNotCompute)
         BECount = NewBTI.Exact;
-      else {
-        // TODO: More analysis could be done here. For example, a
-        // loop with a short-circuiting && operator has an exact count
-        // of the min of both sides.
-        CouldNotComputeBECount = true;
-        BECount = CouldNotCompute;
-      }
-    }
-    if (NewBTI.Max == CouldNotCompute) {
-      // We couldn't compute an maximum value for this exit, so
-      // we won't be able to compute an maximum value for the loop.
-      CouldNotComputeMaxBECount = true;
-      MaxBECount = CouldNotCompute;
-    } else if (!CouldNotComputeMaxBECount) {
-      if (MaxBECount == CouldNotCompute)
-        MaxBECount = NewBTI.Max;
       else
-        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
+        BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
     }
+    if (MaxBECount == CouldNotCompute)
+      MaxBECount = NewBTI.Max;
+    else if (NewBTI.Max != CouldNotCompute)
+      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
   }
 
   return BackedgeTakenInfo(BECount, MaxBECount);
@@ -2825,7 +2865,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
   BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
   if (ExitBr == 0) return CouldNotCompute;
   assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
-  
+
   // At this point, we know we have a conditional branch that determines whether
   // the loop is exited.  However, we don't know if the branch is executed each
   // time through the loop.  If not, then the execution count of the branch will
@@ -2887,9 +2927,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
                                                        Value *ExitCond,
                                                        BasicBlock *TBB,
                                                        BasicBlock *FBB) {
-  // Check if the controlling expression for this loop is an and or or. In
-  // such cases, an exact backedge-taken count may be infeasible, but a
-  // maximum count may still be feasible.
+  // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
@@ -3002,7 +3040,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   LHS = getSCEVAtScope(LHS, L);
   RHS = getSCEVAtScope(RHS, L);
 
-  // At this point, we would like to compute how many iterations of the 
+  // At this point, we would like to compute how many iterations of the
   // loop the predicate will return true for these inputs.
   if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) {
     // If there is a loop-invariant, force it into the RHS.
@@ -3064,7 +3102,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
     if (ExitCond->getOperand(0)->getType()->isUnsigned())
       errs() << "[unsigned] ";
     errs() << *LHS << "   "
-         << Instruction::getOpcodeName(Instruction::ICmp) 
+         << Instruction::getOpcodeName(Instruction::ICmp)
          << "   " << *RHS << "\n";
 #endif
     break;
@@ -3120,10 +3158,12 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
 /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
 /// 'icmp op load X, cst', try to see if we can compute the backedge
 /// execution count.
-const SCEV* ScalarEvolution::
-ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS,
-                                             const Loop *L,
-                                             ICmpInst::Predicate predicate) {
+const SCEV *
+ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
+                                                LoadInst *LI,
+                                                Constant *RHS,
+                                                const Loop *L,
+                                                ICmpInst::Predicate predicate) {
   if (LI->isVolatile()) return CouldNotCompute;
 
   // Check to see if the loaded pointer is a getelementptr of a global.
@@ -3279,8 +3319,10 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
 /// in the header of its containing loop, we know the loop executes a
 /// constant number of times, and the PHI node is just a recurrence
 /// involving constants, fold it.
-Constant *ScalarEvolution::
-getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L){
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+                                                   const APInt& BEs,
+                                                   const Loop *L) {
   std::map<PHINode*, Constant*>::iterator I =
     ConstantEvolutionLoopExitValue.find(PN);
   if (I != ConstantEvolutionLoopExitValue.end())
@@ -3330,8 +3372,10 @@ getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L){
 /// try to evaluate a few iterations of the loop until we get the exit
 /// condition gets a value of ExitWhen (true or false).  If we cannot
 /// evaluate the trip count of the loop, return CouldNotCompute.
-const SCEV* ScalarEvolution::
-ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) {
+const SCEV *
+ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
+                                                       Value *Cond,
+                                                       bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
   if (PN == 0) return CouldNotCompute;
 
@@ -3467,7 +3511,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
             }
           }
         }
-        
+
         Constant *C;
         if (const CmpInst *CI = dyn_cast<CmpInst>(I))
           C = ConstantFoldCompareInstOperands(CI->getPredicate(),
@@ -3492,7 +3536,8 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
       if (OpAtScope != Comm->getOperand(i)) {
         // Okay, at least one of these operands is loop variant but might be
         // foldable.  Build a new instance of the folded commutative expression.
-        SmallVector<const SCEV*, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i);
+        SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+                                            Comm->op_begin()+i);
         NewOps.push_back(OpAtScope);
 
         for (++i; i != e; ++i) {
@@ -3640,7 +3685,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
   APInt Two(BitWidth, 2);
   APInt Four(BitWidth, 4);
 
-  { 
+  {
     using namespace APIntOps;
     const APInt& C = L;
     // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
@@ -3660,7 +3705,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
     // integer value or else APInt::sqrt() will assert.
     APInt SqrtVal(SqrtTerm.sqrt());
 
-    // Compute the two solutions for the quadratic formula. 
+    // Compute the two solutions for the quadratic formula.
     // The divisions must be performed as signed divisions.
     APInt NegB(-B);
     APInt TwoA( A << 1 );
@@ -3672,7 +3717,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
     ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA));
     ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA));
 
-    return std::make_pair(SE.getConstant(Solution1), 
+    return std::make_pair(SE.getConstant(Solution1),
                           SE.getConstant(Solution2));
     } // end APIntOps namespace
 }
@@ -3704,8 +3749,10 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
     // where BW is the common bit width of Start and Step.
 
     // Get the initial value for the loop.
-    const SCEV* Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
-    const SCEV* Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+    const SCEV *Start = getSCEVAtScope(AddRec->getStart(),
+                                       L->getParentLoop());
+    const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1),
+                                      L->getParentLoop());
 
     if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) {
       // For now we handle only constant steps.
@@ -3736,7 +3783,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 #endif
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
-          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, 
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
                                    R1->getValue(), R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
@@ -3861,88 +3908,111 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
         LoopEntryPredicate->isUnconditional())
       continue;
 
-    ICmpInst *ICI = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition());
-    if (!ICI) continue;
+    if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+                        LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
+      return true;
+  }
 
-    // Now that we found a conditional branch that dominates the loop, check to
-    // see if it is the comparison we are looking for.
-    Value *PreCondLHS = ICI->getOperand(0);
-    Value *PreCondRHS = ICI->getOperand(1);
-    ICmpInst::Predicate Cond;
-    if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest)
-      Cond = ICI->getPredicate();
-    else
-      Cond = ICI->getInversePredicate();
+  return false;
+}
 
-    if (Cond == Pred)
-      ; // An exact match.
-    else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
-      ; // The actual condition is beyond sufficient.
-    else
-      // Check a few special cases.
-      switch (Cond) {
-      case ICmpInst::ICMP_UGT:
-        if (Pred == ICmpInst::ICMP_ULT) {
-          std::swap(PreCondLHS, PreCondRHS);
-          Cond = ICmpInst::ICMP_ULT;
-          break;
-        }
-        continue;
-      case ICmpInst::ICMP_SGT:
-        if (Pred == ICmpInst::ICMP_SLT) {
-          std::swap(PreCondLHS, PreCondRHS);
-          Cond = ICmpInst::ICMP_SLT;
+/// isNecessaryCond - Test whether the given CondValue value is a condition
+/// which is at least as strict as the one described by Pred, LHS, and RHS.
+bool ScalarEvolution::isNecessaryCond(Value *CondValue,
+                                      ICmpInst::Predicate Pred,
+                                      const SCEV *LHS, const SCEV *RHS,
+                                      bool Inverse) {
+  // Recursivly handle And and Or conditions.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
+    if (BO->getOpcode() == Instruction::And) {
+      if (!Inverse)
+        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+    } else if (BO->getOpcode() == Instruction::Or) {
+      if (Inverse)
+        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+    }
+  }
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
+  if (!ICI) return false;
+
+  // Now that we found a conditional branch that dominates the loop, check to
+  // see if it is the comparison we are looking for.
+  Value *PreCondLHS = ICI->getOperand(0);
+  Value *PreCondRHS = ICI->getOperand(1);
+  ICmpInst::Predicate Cond;
+  if (Inverse)
+    Cond = ICI->getInversePredicate();
+  else
+    Cond = ICI->getPredicate();
+
+  if (Cond == Pred)
+    ; // An exact match.
+  else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
+    ; // The actual condition is beyond sufficient.
+  else
+    // Check a few special cases.
+    switch (Cond) {
+    case ICmpInst::ICMP_UGT:
+      if (Pred == ICmpInst::ICMP_ULT) {
+        std::swap(PreCondLHS, PreCondRHS);
+        Cond = ICmpInst::ICMP_ULT;
+        break;
+      }
+      return false;
+    case ICmpInst::ICMP_SGT:
+      if (Pred == ICmpInst::ICMP_SLT) {
+        std::swap(PreCondLHS, PreCondRHS);
+        Cond = ICmpInst::ICMP_SLT;
+        break;
+      }
+      return false;
+    case ICmpInst::ICMP_NE:
+      // Expressions like (x >u 0) are often canonicalized to (x != 0),
+      // so check for this case by checking if the NE is comparing against
+      // a minimum or maximum constant.
+      if (!ICmpInst::isTrueWhenEqual(Pred))
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
+          const APInt &A = CI->getValue();
+          switch (Pred) {
+          case ICmpInst::ICMP_SLT:
+            if (A.isMaxSignedValue()) break;
+            return false;
+          case ICmpInst::ICMP_SGT:
+            if (A.isMinSignedValue()) break;
+            return false;
+          case ICmpInst::ICMP_ULT:
+            if (A.isMaxValue()) break;
+            return false;
+          case ICmpInst::ICMP_UGT:
+            if (A.isMinValue()) break;
+            return false;
+          default:
+            return false;
+          }
+          Cond = ICmpInst::ICMP_NE;
+          // NE is symmetric but the original comparison may not be. Swap
+          // the operands if necessary so that they match below.
+          if (isa<SCEVConstant>(LHS))
+            std::swap(PreCondLHS, PreCondRHS);
           break;
         }
-        continue;
-      case ICmpInst::ICMP_NE:
-        // Expressions like (x >u 0) are often canonicalized to (x != 0),
-        // so check for this case by checking if the NE is comparing against
-        // a minimum or maximum constant.
-        if (!ICmpInst::isTrueWhenEqual(Pred))
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
-            const APInt &A = CI->getValue();
-            switch (Pred) {
-            case ICmpInst::ICMP_SLT:
-              if (A.isMaxSignedValue()) break;
-              continue;
-            case ICmpInst::ICMP_SGT:
-              if (A.isMinSignedValue()) break;
-              continue;
-            case ICmpInst::ICMP_ULT:
-              if (A.isMaxValue()) break;
-              continue;
-            case ICmpInst::ICMP_UGT:
-              if (A.isMinValue()) break;
-              continue;
-            default:
-              continue;
-            }
-            Cond = ICmpInst::ICMP_NE;
-            // NE is symmetric but the original comparison may not be. Swap
-            // the operands if necessary so that they match below.
-            if (isa<SCEVConstant>(LHS))
-              std::swap(PreCondLHS, PreCondRHS);
-            break;
-          }
-        continue;
-      default:
-        // We weren't able to reconcile the condition.
-        continue;
-      }
+      return false;
+    default:
+      // We weren't able to reconcile the condition.
+      return false;
+    }
 
-    if (!PreCondLHS->getType()->isInteger()) continue;
+  if (!PreCondLHS->getType()->isInteger()) return false;
 
-    const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS);
-    const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS);
-    if ((HasSameValue(LHS, PreCondLHSSCEV) &&
-         HasSameValue(RHS, PreCondRHSSCEV)) ||
-        (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
-         HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
-      return true;
-  }
-
-  return false;
+  const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
+  const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
+  return (HasSameValue(LHS, PreCondLHSSCEV) &&
+          HasSameValue(RHS, PreCondRHSSCEV)) ||
+         (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
+          HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
 }
 
 /// getBECount - Subtract the end and start values and divide by the step,
@@ -3975,9 +4045,9 @@ const SCEV* ScalarEvolution::getBECount(const SCEV* Start,
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
-ScalarEvolution::BackedgeTakenInfo ScalarEvolution::
-HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                 const Loop *L, bool isSigned) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+                                  const Loop *L, bool isSigned) {
   // Only handle:  "ADDREC < LoopInvariant".
   if (!RHS->isLoopInvariant(L)) return CouldNotCompute;
 
@@ -4027,7 +4097,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     const SCEV* Start = AddRec->getOperand(0);
 
     // Determine the minimum constant start value.
-    const SCEV* MinStart = isa<SCEVConstant>(Start) ? Start :
+    const SCEV *MinStart = isa<SCEVConstant>(Start) ? Start :
       getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) :
                              APInt::getMinValue(BitWidth));
 
@@ -4070,7 +4140,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 /// the condition, thus computing the exit count. If the iteration count can't
 /// be computed, an instance of SCEVCouldNotCompute is returned.
 const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
-                                                   ScalarEvolution &SE) const {
+                                                    ScalarEvolution &SE) const {
   if (Range.isFullSet())  // Infinite loop.
     return SE.getCouldNotCompute();
 
@@ -4129,7 +4199,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 
     // Ensure that the previous value is in the range.  This is a sanity check.
     assert(Range.contains(
-           EvaluateConstantChrecAtConstant(this, 
+           EvaluateConstantChrecAtConstant(this,
            ConstantInt::get(ExitVal - One), SE)->getValue()) &&
            "Linear scev computation is off in a bad way!");
     return SE.getConstant(ExitValue);
@@ -4150,7 +4220,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     if (R1) {
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
-          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, 
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
                                    R1->getValue(), R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
@@ -4264,7 +4334,7 @@ void ScalarEvolution::releaseMemory() {
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
   ValuesAtScopes.clear();
-  
+
   for (std::map<ConstantInt*, SCEVConstant*>::iterator
        I = SCEVConstants.begin(), E = SCEVConstants.end(); I != E; ++I)
     delete I->second;
@@ -4294,7 +4364,7 @@ void ScalarEvolution::releaseMemory() {
   for (std::map<Value*, SCEVUnknown*>::iterator I = SCEVUnknowns.begin(),
        E = SCEVUnknowns.end(); I != E; ++I)
     delete I->second;
-  
+
   SCEVConstants.clear();
   SCEVTruncates.clear();
   SCEVZeroExtends.clear();
@@ -4334,6 +4404,15 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   }
 
   OS << "\n";
+  OS << "Loop " << L->getHeader()->getName() << ": ";
+
+  if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+    OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable max backedge-taken count. ";
+  }
+
+  OS << "\n";
 }
 
 void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c5591d7..4cc5ebc 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -51,21 +51,26 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
   if (Argument *A = dyn_cast<Argument>(V)) {
     // Check to see if there is already a cast!
     for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
-         UI != E; ++UI) {
+         UI != E; ++UI)
       if ((*UI)->getType() == Ty)
         if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
           if (CI->getOpcode() == opcode) {
             // If the cast isn't the first instruction of the function, move it.
-            if (BasicBlock::iterator(CI) != 
+            if (BasicBlock::iterator(CI) !=
                 A->getParent()->getEntryBlock().begin()) {
-              // If the CastInst is the insert point, change the insert point.
-              if (CI == InsertPt) ++InsertPt;
-              // Splice the cast at the beginning of the entry block.
-              CI->moveBefore(A->getParent()->getEntryBlock().begin());
+              // Recreate the cast at the beginning of the entry block.
+              // The old cast is left in place in case it is being used
+              // as an insert point.
+              Instruction *NewCI =
+                CastInst::Create(opcode, V, Ty, "",
+                                 A->getParent()->getEntryBlock().begin());
+              NewCI->takeName(CI);
+              CI->replaceAllUsesWith(NewCI);
+              return NewCI;
             }
             return CI;
           }
-    }
+
     Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(),
                                       A->getParent()->getEntryBlock().begin());
     InsertedValues.insert(I);
@@ -85,10 +90,13 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
             It = cast<InvokeInst>(I)->getNormalDest()->begin();
           while (isa<PHINode>(It)) ++It;
           if (It != BasicBlock::iterator(CI)) {
-            // If the CastInst is the insert point, change the insert point.
-            if (CI == InsertPt) ++InsertPt;
-            // Splice the cast immediately after the operand in question.
-            CI->moveBefore(It);
+            // Recreate the cast at the beginning of the entry block.
+            // The old cast is left in place in case it is being used
+            // as an insert point.
+            Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It);
+            NewCI->takeName(CI);
+            CI->replaceAllUsesWith(NewCI);
+            return NewCI;
           }
           return CI;
         }
@@ -460,13 +468,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     const SCEV* Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
                                           CanonicalIV->getType());
     Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
-    BasicBlock::iterator SaveInsertPt = getInsertionPoint();
+    BasicBlock::iterator SaveInsertPt = InsertPt;
     BasicBlock::iterator NewInsertPt =
       next(BasicBlock::iterator(cast<Instruction>(V)));
     while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
-    setInsertionPoint(SaveInsertPt);
+    InsertPt = SaveInsertPt;
     return V;
   }
 
@@ -497,8 +505,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
       }
     }
 
-    Value *RestV = expand(Rest);
-    return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV)));
+    // Just do a normal add. Pre-expand the operands to suppress folding.
+    return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+                                SE.getUnknown(expand(Rest))));
   }
 
   // {0,+,1} --> Insert a canonical induction variable into the loop!
@@ -546,36 +555,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
              getOrInsertCanonicalInductionVariable(L, Ty);
 
   // If this is a simple linear addrec, emit it now as a special case.
-  if (S->isAffine()) {   // {0,+,F} --> i*F
-    Value *F = expandCodeFor(S->getOperand(1), Ty);
-
-    // If the insert point is directly inside of the loop, emit the multiply at
-    // the insert point.  Otherwise, L is a loop that is a parent of the insert
-    // point loop.  If we can, move the multiply to the outer most loop that it
-    // is safe to be in.
-    BasicBlock::iterator MulInsertPt = getInsertionPoint();
-    Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent());
-    if (InsertPtLoop != L && InsertPtLoop &&
-        L->contains(InsertPtLoop->getHeader())) {
-      do {
-        // If we cannot hoist the multiply out of this loop, don't.
-        if (!InsertPtLoop->isLoopInvariant(F)) break;
-
-        BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader();
-
-        // If this loop hasn't got a preheader, we aren't able to hoist the
-        // multiply.
-        if (!InsertPtLoopPH)
-          break;
-
-        // Otherwise, move the insert point to the preheader.
-        MulInsertPt = InsertPtLoopPH->getTerminator();
-        InsertPtLoop = InsertPtLoop->getParentLoop();
-      } while (InsertPtLoop != L);
-    }
-    
-    return InsertBinop(Instruction::Mul, I, F, MulInsertPt);
-  }
+  if (S->isAffine())    // {0,+,F} --> i*F
+    return
+      expand(SE.getTruncateOrNoop(
+        SE.getMulExpr(SE.getUnknown(I),
+                      SE.getNoopOrAnyExtend(S->getOperand(1),
+                                            I->getType())),
+        Ty));
 
   // If this is a chain of recurrences, turn it into a closed form, using the
   // folders, then expandCodeFor the closed form.  This allows the folders to
@@ -666,14 +652,42 @@ Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) {
 }
 
 Value *SCEVExpander::expand(const SCEV *S) {
-  // Check to see if we already expanded this.
-  std::map<const SCEV*, AssertingVH<Value> >::iterator I =
-    InsertedExpressions.find(S);
-  if (I != InsertedExpressions.end())
+  BasicBlock::iterator SaveInsertPt = InsertPt;
+
+  // Compute an insertion point for this SCEV object. Hoist the instructions
+  // as far out in the loop nest as possible.
+  for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ;
+       L = L->getParentLoop())
+    if (S->isLoopInvariant(L)) {
+      if (!L) break;
+      if (BasicBlock *Preheader = L->getLoopPreheader())
+        InsertPt = Preheader->getTerminator();
+    } else {
+      // If the SCEV is computable at this level, insert it into the header
+      // after the PHIs (and after any other instructions that we've inserted
+      // there) so that it is guaranteed to dominate any user inside the loop.
+      if (L && S->hasComputableLoopEvolution(L))
+        InsertPt = L->getHeader()->getFirstNonPHI();
+      while (isInsertedInstruction(InsertPt)) ++InsertPt;
+      break;
+    }
+
+  // Check to see if we already expanded this here.
+  std::map<std::pair<const SCEV *, Instruction *>,
+           AssertingVH<Value> >::iterator I =
+    InsertedExpressions.find(std::make_pair(S, InsertPt));
+  if (I != InsertedExpressions.end()) {
+    InsertPt = SaveInsertPt;
     return I->second;
-  
+  }
+
+  // Expand the expression into instructions.
   Value *V = visit(S);
-  InsertedExpressions[S] = V;
+
+  // Remember the expanded value for this SCEV at this location.
+  InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+
+  InsertPt = SaveInsertPt;
   return V;
 }
 
@@ -686,6 +700,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
   assert(Ty->isInteger() && "Can only insert integer induction variables!");
   const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
-                                  SE.getIntegerSCEV(1, Ty), L);
-  return expand(H);
+                                   SE.getIntegerSCEV(1, Ty), L);
+  BasicBlock::iterator SaveInsertPt = InsertPt;
+  Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
+  InsertPt = SaveInsertPt;
+  return V;
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e931904..bc3af9a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -45,8 +45,9 @@ AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm,
                        const TargetAsmInfo *T, CodeGenOpt::Level OL, bool VDef)
   : MachineFunctionPass(&ID), FunctionNumber(0), OptLevel(OL), O(o),
     TM(tm), TAI(T), TRI(tm.getRegisterInfo()),
-    IsInTextSection(false)
-{
+    IsInTextSection(false), LastMI(0), LastFn(0), Counter(~0U),
+    PrevDLT(0, ~0U, ~0U) {
+  DW = 0; MMI = 0;
   switch (AsmVerbose) {
   case cl::BOU_UNSET: VerboseAsm = VDef;  break;
   case cl::BOU_TRUE:  VerboseAsm = true;  break;
@@ -177,28 +178,44 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   SwitchToDataSection("");   // Reset back to no section.
   
-  if (TAI->doesSupportDebugInformation() 
-      || TAI->doesSupportExceptionHandling()) {
-    MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-    if (MMI) {
+  if (TAI->doesSupportDebugInformation() ||
+      TAI->doesSupportExceptionHandling()) {
+    MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+    if (MMI)
       MMI->AnalyzeModule(M);
-      DW = getAnalysisIfAvailable<DwarfWriter>();
-      if (DW)
-        DW->BeginModule(&M, MMI, O, this, TAI);
-    }
+    DW = getAnalysisIfAvailable<DwarfWriter>();
+    if (DW)
+      DW->BeginModule(&M, MMI, O, this, TAI);
   }
 
   return false;
 }
 
 bool AsmPrinter::doFinalization(Module &M) {
+  // Emit final debug information.
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+    DW->EndModule();
+  
+  // If the target wants to know about weak references, print them all.
   if (TAI->getWeakRefDirective()) {
-    if (!ExtWeakSymbols.empty())
-      SwitchToDataSection("");
-
-    for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(),
-         e = ExtWeakSymbols.end(); i != e; ++i)
-      O << TAI->getWeakRefDirective() << Mang->getValueName(*i) << '\n';
+    // FIXME: This is not lazy, it would be nice to only print weak references
+    // to stuff that is actually used.  Note that doing so would require targets
+    // to notice uses in operands (due to constant exprs etc).  This should
+    // happen with the MC stuff eventually.
+    SwitchToDataSection("");
+
+    // Print out module-level global variables here.
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      if (I->hasExternalWeakLinkage())
+        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+    }
+    
+    for (Module::const_iterator I = M.begin(), E = M.end();
+         I != E; ++I) {
+      if (I->hasExternalWeakLinkage())
+        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+    }
   }
 
   if (TAI->getSetDirective()) {
@@ -207,7 +224,7 @@ bool AsmPrinter::doFinalization(Module &M) {
 
     O << '\n';
     for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
-         I!=E; ++I) {
+         I != E; ++I) {
       std::string Name = Mang->getValueName(I);
       std::string Target;
 
@@ -235,12 +252,13 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
-  Function* InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+  Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
     if (TAI->getNonexecutableStackDirective())
       O << TAI->getNonexecutableStackDirective() << '\n';
 
   delete Mang; Mang = 0;
+  DW = 0; MMI = 0;
   return false;
 }
 
@@ -1298,20 +1316,15 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
     if (VerboseAsm)
       O << TAI->getCommentString();
   } else if (!strcmp(Code, "uid")) {
-    // Assign a unique ID to this machine instruction.
-    static const MachineInstr *LastMI = 0;
-    static const Function *F = 0;
-    static unsigned Counter = 0U-1;
-
     // Comparing the address of MI isn't sufficient, because machineinstrs may
     // be allocated to the same address across functions.
     const Function *ThisF = MI->getParent()->getParent()->getFunction();
     
-    // If this is a new machine instruction, bump the counter.
-    if (LastMI != MI || F != ThisF) {
+    // If this is a new LastFn instruction, bump the counter.
+    if (LastMI != MI || LastFn != ThisF) {
       ++Counter;
       LastMI = MI;
-      F = ThisF;
+      LastFn = ThisF;
     }
     O << Counter;
   } else {
@@ -1326,7 +1339,6 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
 void AsmPrinter::processDebugLoc(DebugLoc DL) {
   if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
     if (!DL.isUnknown()) {
-      static DebugLocTuple PrevDLT(0, ~0U, ~0U);
       DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
 
       if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT)
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index dc149cf..01c431c 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -126,7 +126,6 @@ void DIE::Profile(FoldingSetNodeID &ID) {
 
 #ifndef NDEBUG
 void DIE::print(std::ostream &O, unsigned IncIndent) {
-  static unsigned IndentCount = 0;
   IndentCount += IncIndent;
   const std::string Indent(IndentCount, ' ');
   bool isBlock = Abbrev.getTag() == 0;
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index b14d91c..5b60327 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -141,9 +141,13 @@ namespace llvm {
 
     /// Abstract compile unit.
     CompileUnit *AbstractCU;
+    
+    // Private data for print()
+    mutable unsigned IndentCount;
   public:
     explicit DIE(unsigned Tag)
-      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), Size(0) {}
+      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
+        Size(0), IndentCount(0) {}
     virtual ~DIE();
 
     // Accessors.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 9d340e3..cbe542b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -141,9 +141,12 @@ class VISIBILITY_HIDDEN DbgScope {
   SmallVector<DbgScope *, 4> Scopes;  // Scopes defined in scope.
   SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
   SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
+  
+  // Private state for dump()
+  mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D)
-    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0) {}
+    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
@@ -176,7 +179,6 @@ public:
 
 #ifndef NDEBUG
 void DbgScope::dump() const {
-  static unsigned IndentLevel = 0;
   std::string Indent(IndentLevel, ' ');
 
   cerr << Indent; Desc.dump();
@@ -1240,27 +1242,7 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
   CompileUnits.push_back(Unit);
 }
 
-/// ConstructCompileUnits - Create a compile unit DIEs.
-void DwarfDebug::ConstructCompileUnits() {
-  GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.compile_units");
-  if (!Root)
-    return;
-  assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
-         "Malformed compile unit descriptor anchor type");
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-  assert(RootC->hasNUsesOrMore(1) &&
-         "Malformed compile unit descriptor anchor type");
-
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI) {
-      GlobalVariable *GV = cast<GlobalVariable>(*UUI);
-      ConstructCompileUnit(GV);
-    }
-}
-
-bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
+void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   DIGlobalVariable DI_GV(GV);
   CompileUnit *DW_Unit = MainCU;
   if (!DW_Unit)
@@ -1269,7 +1251,7 @@ bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   // Check for pre-existence.
   DIE *&Slot = DW_Unit->getDieMapSlotFor(DI_GV.getGV());
   if (Slot)
-    return false;
+    return;
 
   DIE *VariableDie = CreateGlobalVariableDIE(DW_Unit, DI_GV);
 
@@ -1290,33 +1272,10 @@ bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   // Expose as global. FIXME - need to check external flag.
   std::string Name;
   DW_Unit->AddGlobal(DI_GV.getName(Name), VariableDie);
-  return true;
+  return;
 }
 
-/// ConstructGlobalVariableDIEs - Create DIEs for each of the externally visible
-/// global variables. Return true if at least one global DIE is created.
-bool DwarfDebug::ConstructGlobalVariableDIEs() {
-  GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.global_variables");
-  if (!Root)
-    return false;
-
-  assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
-         "Malformed global variable descriptor anchor type");
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-  assert(RootC->hasNUsesOrMore(1) &&
-         "Malformed global variable descriptor anchor type");
-
-  bool Result = false;
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI)
-      Result |= ConstructGlobalVariableDIE(cast<GlobalVariable>(*UUI));
-
-  return Result;
-}
-
-bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
+void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   DISubprogram SP(GV);
   CompileUnit *Unit = MainCU;
   if (!Unit)
@@ -1325,12 +1284,12 @@ bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   // Check for pre-existence.
   DIE *&Slot = Unit->getDieMapSlotFor(GV);
   if (Slot)
-    return false;
+    return;
 
   if (!SP.isDefinition())
     // This is a method declaration which will be handled while constructing
     // class type.
-    return false;
+    return;
 
   DIE *SubprogramDie = CreateSubprogramDIE(Unit, SP);
 
@@ -1343,40 +1302,27 @@ bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   // Expose as global.
   std::string Name;
   Unit->AddGlobal(SP.getName(Name), SubprogramDie);
-  return true;
+  return;
 }
 
-/// ConstructSubprograms - Create DIEs for each of the externally visible
-/// subprograms. Return true if at least one subprogram DIE is created.
-bool DwarfDebug::ConstructSubprograms() {
-  GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.subprograms");
-  if (!Root)
-    return false;
-
-  assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
-         "Malformed subprogram descriptor anchor type");
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-  assert(RootC->hasNUsesOrMore(1) &&
-         "Malformed subprogram descriptor anchor type");
+  /// BeginModule - Emit all Dwarf sections that should come prior to the
+  /// content. Create global DIEs and emit initial debug info sections.
+  /// This is inovked by the target AsmPrinter.
+void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
+  this->M = M;
 
-  bool Result = false;
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI)
-      Result |= ConstructSubprogram(cast<GlobalVariable>(*UUI));
-
-  return Result;
-}
-
-/// SetDebugInfo - Create global DIEs and emit initial debug info sections.
-/// This is inovked by the target AsmPrinter.
-void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+  SmallVector<GlobalVariable *, 2> CUs;
+  SmallVector<GlobalVariable *, 4> GVs;
+  SmallVector<GlobalVariable *, 4> SPs;
+  CollectDebugInfoAnchors(*M, CUs, GVs, SPs);
+
   // Create all the compile unit DIEs.
-  ConstructCompileUnits();
+  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I) 
+    ConstructCompileUnit(*I);
 
   if (CompileUnits.empty()) {
     if (TimePassesIsEnabled)
@@ -1385,21 +1331,25 @@ void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) {
     return;
   }
 
-  // Create DIEs for each of the externally visible global variables.
-  bool globalDIEs = ConstructGlobalVariableDIEs();
-
-  // Create DIEs for each of the externally visible subprograms.
-  bool subprogramDIEs = ConstructSubprograms();
-
   // If there is not any debug info available for any global variables and any
   // subprograms then there is not any debug info to emit.
-  if (!globalDIEs && !subprogramDIEs) {
+  if (GVs.empty() && SPs.empty()) {
     if (TimePassesIsEnabled)
       DebugTimer->stopTimer();
 
     return;
   }
 
+  // Create DIEs for each of the externally visible global variables.
+  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
+         E = GVs.end(); I != E; ++I) 
+    ConstructGlobalVariableDIE(*I);
+
+  // Create DIEs for each of the externally visible subprograms.
+  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
+         E = SPs.end(); I != E; ++I) 
+    ConstructSubprogram(*I);
+
   MMI = mmi;
   shouldEmit = true;
   MMI->setDebugInfoAvailability(true);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 9824566..111ec33 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -460,21 +460,10 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
 
   void ConstructCompileUnit(GlobalVariable *GV);
 
-  /// ConstructCompileUnits - Create a compile unit DIEs.
-  void ConstructCompileUnits();
+  void ConstructGlobalVariableDIE(GlobalVariable *GV);
 
-  bool ConstructGlobalVariableDIE(GlobalVariable *GV);
+  void ConstructSubprogram(GlobalVariable *GV);
 
-  /// ConstructGlobalVariableDIEs - Create DIEs for each of the externally 
-  /// visible global variables. Return true if at least one global DIE is
-  /// created.
-  bool ConstructGlobalVariableDIEs();
-
-  bool ConstructSubprogram(GlobalVariable *GV);
-
-  /// ConstructSubprograms - Create DIEs for each of the externally visible
-  /// subprograms. Return true if at least one subprogram DIE is created.
-  bool ConstructSubprograms();
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -486,15 +475,9 @@ public:
   /// be emitted.
   bool ShouldEmitDwarfDebug() const { return shouldEmit; }
 
-  /// SetDebugInfo - Create global DIEs and emit initial debug info sections.
-  /// This is inovked by the target AsmPrinter.
-  void SetDebugInfo(MachineModuleInfo *mmi);
-
   /// BeginModule - Emit all Dwarf sections that should come prior to the
   /// content.
-  void BeginModule(Module *M) {
-    this->M = M;
-  }
+  void BeginModule(Module *M, MachineModuleInfo *MMI);
 
   /// EndModule - Emit all Dwarf sections that should come after the content.
   ///
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 4479af2..f1c3e56 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -149,16 +149,11 @@ public:
   DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
   virtual ~DwarfException();
 
-  /// SetModuleInfo - Set machine module information when it's known that pass
-  /// manager has created it.  Set by the target AsmPrinter.
-  void SetModuleInfo(MachineModuleInfo *mmi) {
-    MMI = mmi;
-  }
-
   /// BeginModule - Emit all exception information that should come prior to the
   /// content.
-  void BeginModule(Module *M) {
-    this->M = M;
+  void BeginModule(Module *m, MachineModuleInfo *mmi) {
+    this->M = m;
+    this->MMI = mmi;
   }
 
   /// EndModule - Emit all exception information that should come after the
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 483ee559..89084989 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -42,10 +42,8 @@ void DwarfWriter::BeginModule(Module *M,
                               const TargetAsmInfo *T) {
   DE = new DwarfException(OS, A, T);
   DD = new DwarfDebug(OS, A, T);
-  DE->BeginModule(M);
-  DD->BeginModule(M);
-  DD->SetDebugInfo(MMI);
-  DE->SetModuleInfo(MMI);
+  DE->BeginModule(M, MMI);
+  DD->BeginModule(M, MMI);
 }
 
 /// EndModule - Emit all Dwarf sections that should come after the content.
diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile
index cb5b3f6..8f65d8d 100644
--- a/lib/CodeGen/AsmPrinter/Makefile
+++ b/lib/CodeGen/AsmPrinter/Makefile
@@ -9,7 +9,5 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMAsmPrinter
 PARALLEL_DIRS =
-BUILD_ARCHIVE = 1
-DONT_BUILD_RELINKED = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 5ba8b3c..eeefe31 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -63,3 +63,5 @@ add_llvm_library(LLVMCodeGen
   VirtRegMap.cpp
   VirtRegRewriter.cpp
   )
+
+target_link_libraries (LLVMCodeGen LLVMCore)
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index 28b6be8..8d92373 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -144,6 +144,9 @@ namespace llvm {
     uint8_t Other;
     unsigned short SectionIdx;
 
+    // Symbol index into the Symbol table
+    unsigned SymTabIdx;
+
     enum { 
       STB_LOCAL = 0,
       STB_GLOBAL = 1,
@@ -168,7 +171,8 @@ namespace llvm {
     ELFSym(const GlobalValue *gv) : GV(gv), IsCommon(false), IsBss(false),
                                     IsConstant(false), NameIdx(0), Value(0),
                                     Size(0), Info(0), Other(STV_DEFAULT),
-                                    SectionIdx(ELFSection::SHN_UNDEF) {
+                                    SectionIdx(ELFSection::SHN_UNDEF),
+                                    SymTabIdx(0) {
       if (!GV)
         return;
 
@@ -191,6 +195,10 @@ namespace llvm {
       return (Info >> 4) & 0xf;
     }
 
+    unsigned getType() {
+      return Info & 0xf;
+    }
+
     void setBind(unsigned X) {
       assert(X == (X & 0xF) && "Bind value out of range!");
       Info = (Info & 0x0F) | (X << 4);
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 8cb7c94..168fed5 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 
@@ -103,21 +104,28 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
     break;
   }
 
+  // Emit constant pool to appropriate section(s)
+  emitConstantPool(MF.getConstantPool());
+
   // Relocations
   // -----------
-  // If we have emitted any relocations to function-specific objects such as 
+  // If we have emitted any relocations to function-specific objects such as
   // basic blocks, constant pools entries, or jump tables, record their
   // addresses now so that we can rewrite them with the correct addresses
   // later.
   for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
     MachineRelocation &MR = Relocations[i];
     intptr_t Addr;
-    if (MR.isBasicBlock()) {
+    if (MR.isGlobalValue()) {
+      EW.PendingGlobals.insert(MR.getGlobalValue());
+    } else if (MR.isBasicBlock()) {
       Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
       MR.setConstantVal(ES->SectionIdx);
       MR.setResultPointer((void*)Addr);
-    } else if (MR.isGlobalValue()) {
-      EW.PendingGlobals.insert(MR.getGlobalValue());
+    } else if (MR.isConstantPoolIndex()) {
+      Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+      MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+      MR.setResultPointer((void*)Addr);
     } else {
       assert(0 && "Unhandled relocation type");
     }
@@ -128,4 +136,36 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
   return false;
 }
 
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in and emit the constant
+void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // TODO: handle PIC codegen
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "PIC codegen not yet handled for elf constant pools!");
+
+  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = CP[i];
+
+    // Get the right ELF Section for this constant pool entry
+    std::string CstPoolName =
+      TAI->SelectSectionForMachineConst(CPE.getType())->getName();
+    ELFSection &CstPoolSection =
+      EW.getConstantPoolSection(CstPoolName, CPE.getAlignment());
+
+    // Record the constant pool location and the section index
+    CPLocations.push_back(CstPoolSection.size());
+    CPSections.push_back(CstPoolSection.SectionIdx);
+
+    if (CPE.isMachineConstantPoolEntry())
+      assert("CPE.isMachineConstantPoolEntry not supported yet");
+
+    // Emit the constant to constant pool section
+    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPoolSection);
+  }
+}
+
 } // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index c0289da..c309ef7 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -31,6 +31,14 @@ namespace llvm {
     /// emitted.
     std::vector<MachineRelocation> Relocations;
 
+    /// CPLocations - This is a map of constant pool indices to offsets from the
+    /// start of the section for that constant pool index.
+    std::vector<uintptr_t> CPLocations;
+
+    /// CPSections - This is a map of constant pool indices to the MachOSection
+    /// containing the constant pool entry for that index.
+    std::vector<unsigned> CPSections;
+
     /// MBBLocations - This vector is a mapping from MBB ID's to their address.
     /// It is filled in by the StartMachineBasicBlock callback and queried by
     /// the getMachineBasicBlockAddress callback.
@@ -62,9 +70,10 @@ namespace llvm {
     }
 
     virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-      assert(0 && "CP not implementated yet!");
-      return 0;
+      assert(CPLocations.size() > Index && "CP not emitted!");
+      return CPLocations[Index];
     }
+
     virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
       assert(0 && "JT not implementated yet!");
       return 0;
@@ -86,6 +95,10 @@ namespace llvm {
       abort();
     }
 
+    /// emitConstantPool - For each constant pool entry, figure out which section
+    /// the constant should live in and emit the constant.
+    void emitConstantPool(MachineConstantPool *MCP);
+
     virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
 
     /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 03db656..041defa 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -389,6 +389,24 @@ bool ELFWriter::doFinalization(Module &M) {
   if (TAI->getNonexecutableStackDirective())
     getNonExecStackSection();
 
+  // Emit a symbol for each section created until now
+  for (std::map<std::string, ELFSection*>::iterator I = SectionLookup.begin(),
+       E = SectionLookup.end(); I != E; ++I) {
+    ELFSection *ES = I->second;
+
+    // Skip null section
+    if (ES->SectionIdx == 0) continue;
+
+    ELFSym SectionSym(0);
+    SectionSym.SectionIdx = ES->SectionIdx;
+    SectionSym.Size = 0;
+    SectionSym.setBind(ELFSym::STB_LOCAL);
+    SectionSym.setType(ELFSym::STT_SECTION);
+
+    // Local symbols go in the list front
+    SymbolList.push_front(SectionSym);
+  }
+
   // Emit string table
   EmitStringTable();
 
@@ -451,15 +469,25 @@ void ELFWriter::EmitRelocations() {
 
       // Constant addend used to compute the value to be stored 
       // into the relocatable field
-      int64_t Addend = TEW->getAddendForRelTy(RelType);
+      int64_t Addend = 0;
 
       // There are several machine relocations types, and each one of
       // them needs a different approach to retrieve the symbol table index.
       if (MR.isGlobalValue()) {
         const GlobalValue *G = MR.getGlobalValue();
         SymIdx = GblSymLookup[G];
+        Addend = TEW->getAddendForRelTy(RelType);
       } else {
-        assert(0 && "dunno how to handle other relocation types");
+        unsigned SectionIdx = MR.getConstantVal();
+        // TODO: use a map for this.
+        for (std::list<ELFSym>::iterator I = SymbolList.begin(),
+             E = SymbolList.end(); I != E; ++I)
+          if ((SectionIdx == I->SectionIdx) &&
+              (I->getType() == ELFSym::STT_SECTION)) {
+            SymIdx = I->SymTabIdx;
+            break;
+          }
+        Addend = (uint64_t)MR.getResultPointer();
       }
 
       // Get the relocation entry and emit to the relocation section
@@ -540,7 +568,8 @@ void ELFWriter::EmitStringTable() {
        E = SymbolList.end(); I != E; ++I) {
 
     // Use the name mangler to uniquify the LLVM symbol.
-    std::string Name = Mang->getValueName(I->GV);
+    std::string Name;
+    if (I->GV) Name.append(Mang->getValueName(I->GV));
 
     if (Name.empty()) {
       I->NameIdx = 0;
@@ -589,7 +618,11 @@ void ELFWriter::EmitSymbolTable() {
     EmitSymbol(SymTab, *I);
 
     // Record the symbol table index for each global value
-    GblSymLookup[I->GV] = Index;
+    if (I->GV)
+      GblSymLookup[I->GV] = Index;
+
+    // Keep track on the symbol index into the symbol table
+    I->SymTabIdx = Index;
   }
 
   SymTab.Info = FirstNonLocalSymbol;
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index 39577d9..e0e71d0 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -147,6 +147,12 @@ namespace llvm {
                         ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
     }
 
+    /// Get a constant pool section based on the section name returned by TAI
+    ELFSection &getConstantPoolSection(std::string SName, unsigned Align) {
+      return getSection(SName, ELFSection::SHT_PROGBITS,
+                        ELFSection::SHF_MERGE | ELFSection::SHF_ALLOC, Align);
+    }
+
     /// Return the relocation section of section 'S'. 'RelA' is true
     /// if the relocation section contains entries with addends.
     ELFSection &getRelocSection(std::string SName, bool RelA) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 4d5c3c2..d5e7ea5 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -144,9 +144,10 @@ namespace {
     const TargetLowering *TLI;
     const TargetInstrInfo *TII;
     bool MadeChange;
+    int FnNum;
   public:
     static char ID;
-    IfConverter() : MachineFunctionPass(&ID) {}
+    IfConverter() : MachineFunctionPass(&ID), FnNum(-1) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     virtual const char *getPassName() const { return "If Converter"; }
@@ -225,7 +226,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   if (!TII) return false;
 
-  static int FnNum = -1;
   DOUT << "\nIfcvt: function (" << ++FnNum <<  ") \'"
        << MF.getFunction()->getName() << "\'";
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index e6912b8..052334a 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -61,18 +61,16 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
 template <class ArgIt>
 static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
                                  ArgIt ArgBegin, ArgIt ArgEnd,
-                                 const Type *RetTy, Constant *&FCache) {
-  if (!FCache) {
-    // If we haven't already looked up this function, check to see if the
-    // program already contains a function with this name.
-    Module *M = CI->getParent()->getParent()->getParent();
-    // Get or insert the definition now.
-    std::vector<const Type *> ParamTys;
-    for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
-      ParamTys.push_back((*I)->getType());
-    FCache = M->getOrInsertFunction(NewFn,
-                                    FunctionType::get(RetTy, ParamTys, false));
-  }
+                                 const Type *RetTy) {
+  // If we haven't already looked up this function, check to see if the
+  // program already contains a function with this name.
+  Module *M = CI->getParent()->getParent()->getParent();
+  // Get or insert the definition now.
+  std::vector<const Type *> ParamTys;
+  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+    ParamTys.push_back((*I)->getType());
+  Constant* FCache = M->getOrInsertFunction(NewFn,
+                                  FunctionType::get(RetTy, ParamTys, false));
 
   IRBuilder<> Builder(CI->getParent(), CI);
   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
@@ -624,25 +622,24 @@ static Instruction *LowerPartSet(CallInst *CI) {
   return NewCI;
 }
 
-static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache,
-                                       Constant *DCache, Constant *LDCache,
-                                       const char *Fname, const char *Dname,
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+                                       const char *Dname,
                                        const char *LDname) {
   switch (CI->getOperand(1)->getType()->getTypeID()) {
   default: assert(0 && "Invalid type in intrinsic"); abort();
   case Type::FloatTyID:
     ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::FloatTy, FCache);
+                  Type::FloatTy);
     break;
   case Type::DoubleTyID:
     ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::DoubleTy, DCache);
+                  Type::DoubleTy);
     break;
   case Type::X86_FP80TyID:
   case Type::FP128TyID:
   case Type::PPC_FP128TyID:
     ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
-                  CI->getOperand(1)->getType(), LDCache);
+                  CI->getOperand(1)->getType());
     break;
   }
 }
@@ -668,9 +665,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     // by the lowerinvoke pass.  In both cases, the right thing to do is to
     // convert the call to an explicit setjmp or longjmp call.
   case Intrinsic::setjmp: {
-    static Constant *SetjmpFCache = 0;
     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                               Type::Int32Ty, SetjmpFCache);
+                               Type::Int32Ty);
     if (CI->getType() != Type::VoidTy)
       CI->replaceAllUsesWith(V);
     break;
@@ -681,17 +677,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
      break;
 
   case Intrinsic::longjmp: {
-    static Constant *LongjmpFCache = 0;
     ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                    Type::VoidTy, LongjmpFCache);
+                    Type::VoidTy);
     break;
   }
 
   case Intrinsic::siglongjmp: {
     // Insert the call to abort
-    static Constant *AbortFCache = 0;
     ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), 
-                    Type::VoidTy, AbortFCache);
+                    Type::VoidTy);
     break;
   }
   case Intrinsic::ctpop:
@@ -728,7 +722,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
 
   case Intrinsic::stacksave:
   case Intrinsic::stackrestore: {
-    static bool Warned = false;
     if (!Warned)
       cerr << "WARNING: this target does not support the llvm.stack"
            << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
@@ -783,7 +776,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;   // Strip out annotate intrinsic
     
   case Intrinsic::memcpy: {
-    static Constant *MemcpyFCache = 0;
     const IntegerType *IntPtr = TD.getIntPtrType();
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
@@ -791,12 +783,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     Ops[0] = CI->getOperand(1);
     Ops[1] = CI->getOperand(2);
     Ops[2] = Size;
-    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
-                    MemcpyFCache);
+    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType());
     break;
   }
   case Intrinsic::memmove: {
-    static Constant *MemmoveFCache = 0;
     const IntegerType *IntPtr = TD.getIntPtrType();
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
@@ -804,12 +794,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     Ops[0] = CI->getOperand(1);
     Ops[1] = CI->getOperand(2);
     Ops[2] = Size;
-    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
-                    MemmoveFCache);
+    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType());
     break;
   }
   case Intrinsic::memset: {
-    static Constant *MemsetFCache = 0;
     const IntegerType *IntPtr = TD.getIntPtrType();
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
@@ -819,64 +807,35 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty,
                                    /* isSigned */ false);
     Ops[2] = Size;
-    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
-                    MemsetFCache);
+    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
     break;
   }
   case Intrinsic::sqrt: {
-    static Constant *sqrtFCache = 0;
-    static Constant *sqrtDCache = 0;
-    static Constant *sqrtLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache,
-                               "sqrtf", "sqrt", "sqrtl");
+    ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
     break;
   }
   case Intrinsic::log: {
-    static Constant *logFCache = 0;
-    static Constant *logDCache = 0;
-    static Constant *logLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache,
-                               "logf", "log", "logl");
+    ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
     break;
   }
   case Intrinsic::log2: {
-    static Constant *log2FCache = 0;
-    static Constant *log2DCache = 0;
-    static Constant *log2LDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache,
-                               "log2f", "log2", "log2l");
+    ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
     break;
   }
   case Intrinsic::log10: {
-    static Constant *log10FCache = 0;
-    static Constant *log10DCache = 0;
-    static Constant *log10LDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache,
-                               "log10f", "log10", "log10l");
+    ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
     break;
   }
   case Intrinsic::exp: {
-    static Constant *expFCache = 0;
-    static Constant *expDCache = 0;
-    static Constant *expLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache,
-                               "expf", "exp", "expl");
+    ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
     break;
   }
   case Intrinsic::exp2: {
-    static Constant *exp2FCache = 0;
-    static Constant *exp2DCache = 0;
-    static Constant *exp2LDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache,
-                               "exp2f", "exp2", "exp2l");
+    ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
     break;
   }
   case Intrinsic::pow: {
-    static Constant *powFCache = 0;
-    static Constant *powDCache = 0;
-    static Constant *powLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache,
-                               "powf", "pow", "powl");
+    ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
     break;
   }
   case Intrinsic::flt_rounds:
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index cac9253..26722a3 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -373,7 +373,8 @@ void LiveInterval::scaleNumbering(unsigned factor) {
   for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
     VNInfo *vni = *VNI;
 
-    vni->def = InstrSlots::scale(vni->def, factor);
+    if (vni->isDefAccurate())
+      vni->def = InstrSlots::scale(vni->def, factor);
 
     for (unsigned i = 0; i < vni->kills.size(); ++i) {
       if (vni->kills[i] != 0)
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index d6931df..21bb5dc 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -947,6 +947,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
     unsigned Reg = MO.getReg();
     if (Reg == 0 || Reg == li.reg)
       continue;
+    
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+        !allocatableRegs_[Reg])
+      continue;
     // FIXME: For now, only remat MI with at most one register operand.
     assert(!RegOp &&
            "Can't rematerialize instruction with multiple register operand!");
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index cacfed1..2d2b59e 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -124,25 +124,28 @@ MachineFunction::MachineFunction(const Function *F,
                   MachineFrameInfo(*TM.getFrameInfo());
   ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
                      MachineConstantPool(TM.getTargetData());
-  
+
   // Set up jump table.
   const TargetData &TD = *TM.getTargetData();
   bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
   unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
-  unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
-                             : TD.getPointerABIAlignment();
+  unsigned TyAlignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+                               : TD.getPointerABIAlignment();
   JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
-                      MachineJumpTableInfo(EntrySize, Alignment);
+                      MachineJumpTableInfo(EntrySize, TyAlignment);
 }
 
 MachineFunction::~MachineFunction() {
   BasicBlocks.clear();
   InstructionRecycler.clear(Allocator);
   BasicBlockRecycler.clear(Allocator);
-  if (RegInfo)
-    RegInfo->~MachineRegisterInfo();        Allocator.Deallocate(RegInfo);
+  if (RegInfo) {
+    RegInfo->~MachineRegisterInfo();
+    Allocator.Deallocate(RegInfo);
+  }
   if (MFInfo) {
-    MFInfo->~MachineFunctionInfo();       Allocator.Deallocate(MFInfo);
+    MFInfo->~MachineFunctionInfo();
+    Allocator.Deallocate(MFInfo);
   }
   FrameInfo->~MachineFrameInfo();         Allocator.Deallocate(FrameInfo);
   ConstantPool->~MachineConstantPool();   Allocator.Deallocate(ConstantPool);
@@ -295,12 +298,6 @@ void MachineFunction::print(std::ostream &OS) const {
   OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
 }
 
-/// CFGOnly flag - This is used to control whether or not the CFG graph printer
-/// prints out the contents of basic blocks or not.  This is acceptable because
-/// this code is only really used for debugging purposes.
-///
-static bool CFGOnly = false;
-
 namespace llvm {
   template<>
   struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
@@ -309,13 +306,14 @@ namespace llvm {
     }
 
     static std::string getNodeLabel(const MachineBasicBlock *Node,
-                                    const MachineFunction *Graph) {
-      if (CFGOnly && Node->getBasicBlock() &&
+                                    const MachineFunction *Graph,
+                                    bool ShortNames) {
+      if (ShortNames && Node->getBasicBlock() &&
           !Node->getBasicBlock()->getName().empty())
         return Node->getBasicBlock()->getName() + ":";
 
       std::ostringstream Out;
-      if (CFGOnly) {
+      if (ShortNames) {
         Out << Node->getNumber() << ':';
         return Out.str();
       }
@@ -348,9 +346,12 @@ void MachineFunction::viewCFG() const
 
 void MachineFunction::viewCFGOnly() const
 {
-  CFGOnly = true;
-  viewCFG();
-  CFGOnly = false;
+#ifndef NDEBUG
+  ViewGraph(this, "mf" + getFunction()->getName(), true);
+#else
+  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+       << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
 }
 
 // The next two methods are used to construct and to retrieve
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index c351593..c977508 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -150,7 +150,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
 /// isIdenticalTo - Return true if this operand is identical to the specified
 /// operand.
 bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
-  if (getType() != Other.getType()) return false;
+  if (getType() != Other.getType() ||
+      getTargetFlags() != Other.getTargetFlags())
+    return false;
   
   switch (getType()) {
   default: assert(0 && "Unrecognized operand type");
@@ -205,70 +207,72 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     }
 
     if (getSubReg() != 0) {
-      OS << ":" << getSubReg();
+      OS << ':' << getSubReg();
     }
 
     if (isDef() || isKill() || isDead() || isImplicit() || isEarlyClobber()) {
-      OS << "<";
+      OS << '<';
       bool NeedComma = false;
       if (isImplicit()) {
-        if (NeedComma) OS << ",";
+        if (NeedComma) OS << ',';
         OS << (isDef() ? "imp-def" : "imp-use");
         NeedComma = true;
       } else if (isDef()) {
-        if (NeedComma) OS << ",";
+        if (NeedComma) OS << ',';
         if (isEarlyClobber())
           OS << "earlyclobber,";
         OS << "def";
         NeedComma = true;
       }
       if (isKill() || isDead()) {
-        if (NeedComma) OS << ",";
+        if (NeedComma) OS << ',';
         if (isKill())  OS << "kill";
         if (isDead())  OS << "dead";
       }
-      OS << ">";
+      OS << '>';
     }
     break;
   case MachineOperand::MO_Immediate:
     OS << getImm();
     break;
   case MachineOperand::MO_FPImmediate:
-    if (getFPImm()->getType() == Type::FloatTy) {
+    if (getFPImm()->getType() == Type::FloatTy)
       OS << getFPImm()->getValueAPF().convertToFloat();
-    } else {
+    else
       OS << getFPImm()->getValueAPF().convertToDouble();
-    }
     break;
   case MachineOperand::MO_MachineBasicBlock:
     OS << "mbb<"
        << ((Value*)getMBB()->getBasicBlock())->getName()
-       << "," << (void*)getMBB() << ">";
+       << "," << (void*)getMBB() << '>';
     break;
   case MachineOperand::MO_FrameIndex:
-    OS << "<fi#" << getIndex() << ">";
+    OS << "<fi#" << getIndex() << '>';
     break;
   case MachineOperand::MO_ConstantPoolIndex:
     OS << "<cp#" << getIndex();
     if (getOffset()) OS << "+" << getOffset();
-    OS << ">";
+    OS << '>';
     break;
   case MachineOperand::MO_JumpTableIndex:
-    OS << "<jt#" << getIndex() << ">";
+    OS << "<jt#" << getIndex() << '>';
     break;
   case MachineOperand::MO_GlobalAddress:
     OS << "<ga:" << ((Value*)getGlobal())->getName();
     if (getOffset()) OS << "+" << getOffset();
-    OS << ">";
+    OS << '>';
     break;
   case MachineOperand::MO_ExternalSymbol:
     OS << "<es:" << getSymbolName();
     if (getOffset()) OS << "+" << getOffset();
-    OS << ">";
+    OS << '>';
     break;
   default:
     assert(0 && "Unrecognized operand type");
   }
+  
+  if (unsigned TF = getTargetFlags())
+    OS << "[TF=" << TF << ']';
 }
 
 //===----------------------------------------------------------------------===//
@@ -716,31 +720,37 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
     const MachineOperand &MO = getOperand(DefOpIdx);
     if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
       return false;
-    // Determine the actual operand no corresponding to this index.
+    // Determine the actual operand index that corresponds to this index.
     unsigned DefNo = 0;
+    unsigned DefPart = 0;
     for (unsigned i = 1, e = getNumOperands(); i < e; ) {
       const MachineOperand &FMO = getOperand(i);
       assert(FMO.isImm());
       // Skip over this def.
-      i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
-      if (i > DefOpIdx)
+      unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
+      unsigned PrevDef = i + 1;
+      i = PrevDef + NumOps;
+      if (i > DefOpIdx) {
+        DefPart = DefOpIdx - PrevDef;
         break;
+      }
       ++DefNo;
     }
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
       const MachineOperand &FMO = getOperand(i);
       if (!FMO.isImm())
         continue;
       if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
         continue;
       unsigned Idx;
-      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && 
+      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
           Idx == DefNo) {
         if (UseOpIdx)
-          *UseOpIdx = (unsigned)i + 1;
+          *UseOpIdx = (unsigned)i + 1 + DefPart;
         return true;
       }
     }
+    return false;
   }
 
   assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
@@ -766,10 +776,16 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
     const MachineOperand &MO = getOperand(UseOpIdx);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
       return false;
-    assert(UseOpIdx > 0);
-    const MachineOperand &UFMO = getOperand(UseOpIdx-1);
-    if (!UFMO.isImm())
-      return false;  // Must be physreg uses.
+    int FlagIdx = UseOpIdx - 1;
+    if (FlagIdx < 1)
+      return false;
+    while (!getOperand(FlagIdx).isImm()) {
+      if (--FlagIdx == 0)
+        return false;
+    }
+    const MachineOperand &UFMO = getOperand(FlagIdx);
+    if (FlagIdx + InlineAsm::getNumOperandRegisters(UFMO.getImm()) < UseOpIdx)
+      return false;
     unsigned DefNo;
     if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
       if (!DefOpIdx)
@@ -785,7 +801,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
         DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
         --DefNo;
       }
-      *DefOpIdx = DefIdx+1;
+      *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
       return true;
     }
     return false;
@@ -1092,13 +1108,13 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
 
   // If not found, this means an alias of one of the operands is dead. Add a
   // new implicit operand if required.
-  if (!Found && AddIfNotFound) {
-    addOperand(MachineOperand::CreateReg(IncomingReg,
-                                         true  /*IsDef*/,
-                                         true  /*IsImp*/,
-                                         false /*IsKill*/,
-                                         true  /*IsDead*/));
-    return true;
-  }
-  return Found;
+  if (Found || !AddIfNotFound)
+    return Found;
+    
+  addOperand(MachineOperand::CreateReg(IncomingReg,
+                                       true  /*IsDef*/,
+                                       true  /*IsImp*/,
+                                       false /*IsKill*/,
+                                       true  /*IsDead*/));
+  return true;
 }
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 594c24d..5efd274 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -59,7 +59,8 @@ namespace llvm {
     
 
     static std::string getNodeLabel(const SUnit *Node,
-                                    const ScheduleDAG *Graph);
+                                    const ScheduleDAG *Graph,
+                                    bool ShortNames);
     static std::string getNodeAttributes(const SUnit *N,
                                          const ScheduleDAG *Graph) {
       return "shape=Mrecord";
@@ -73,7 +74,8 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
-                                                       const ScheduleDAG *G) {
+                                                       const ScheduleDAG *G,
+                                                       bool ShortNames) {
   return G->getGraphNodeLabel(SU);
 }
 
@@ -84,11 +86,11 @@ void ScheduleDAG::viewGraph() {
 // This code is only for debugging!
 #ifndef NDEBUG
   if (BB->getBasicBlock())
-    ViewGraph(this, "dag." + MF.getFunction()->getName(),
+    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
               "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
               BB->getBasicBlock()->getName());
   else
-    ViewGraph(this, "dag." + MF.getFunction()->getName(),
+    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
               "Scheduling-Units Graph for " + MF.getFunction()->getName());
 #else
   cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 1bb8090..ef365e6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -211,7 +211,7 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
                                            CodeGenOpt::Level ol)
   : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
     ValueTypeActions(TLI.getValueTypeActions()) {
-  assert(MVT::LAST_VALUETYPE <= 32 &&
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 75c8924..02b0732 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -159,7 +159,7 @@ public:
   explicit DAGTypeLegalizer(SelectionDAG &dag)
     : TLI(dag.getTargetLoweringInfo()), DAG(dag),
     ValueTypeActions(TLI.getValueTypeActions()) {
-    assert(MVT::LAST_VALUETYPE <= 32 &&
+    assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
            "Too many value types for ValueTypeActions to hold!");
   }
 
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
index 185222a..73f0b5d 100644
--- a/lib/CodeGen/SelectionDAG/Makefile
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -9,7 +9,5 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMSelectionDAG
 PARALLEL_DIRS =
-BUILD_ARCHIVE = 1
-DONT_BUILD_RELINKED = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
index fb5e207..e372b5b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -30,10 +30,9 @@ using namespace llvm;
 
 /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
 /// implicit physical register output.
-void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
-                                         bool IsClone, bool IsCloned,
-                                         unsigned SrcReg,
-                                         DenseMap<SDValue, unsigned> &VRBaseMap) {
+void ScheduleDAGSDNodes::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
   unsigned VRBase = 0;
   if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
     // Just use the input register directly!
@@ -281,13 +280,15 @@ void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
   } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset()));
+    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+                                            TGA->getTargetFlags()));
   } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
   } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex()));
+    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+                                             JT->getTargetFlags()));
   } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
     int Offset = CP->getOffset();
     unsigned Align = CP->getAlignment();
@@ -306,9 +307,11 @@ void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
       Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);
     else
       Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);
-    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset));
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+                                             CP->getTargetFlags()));
   } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateES(ES->getSymbol()));
+    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), 0,
+                                            ES->getTargetFlags()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Flag &&
@@ -335,7 +338,7 @@ getSuperRegisterRegClass(const TargetRegisterClass *TRC,
 /// EmitSubregNode - Generate machine code for subreg nodes.
 ///
 void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node, 
-                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
+                                        DenseMap<SDValue, unsigned> &VRBaseMap){
   unsigned VRBase = 0;
   unsigned Opc = Node->getMachineOpcode();
   
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ce01d53..0342f67 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,8 +31,10 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -359,6 +361,9 @@ static void AddNodeIDNode(FoldingSetNodeID &ID,
 /// the NodeID data.
 static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   switch (N->getOpcode()) {
+  case ISD::TargetExternalSymbol:
+  case ISD::ExternalSymbol:
+    assert(0 && "Should only be used on nodes with operands");
   default: break;  // Normal nodes don't need extra info.
   case ISD::ARG_FLAGS:
     ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits());
@@ -379,6 +384,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
     ID.AddPointer(GA->getGlobal());
     ID.AddInteger(GA->getOffset());
+    ID.AddInteger(GA->getTargetFlags());
     break;
   }
   case ISD::BasicBlock:
@@ -409,6 +415,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::JumpTable:
   case ISD::TargetJumpTable:
     ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
     break;
   case ISD::ConstantPool:
   case ISD::TargetConstantPool: {
@@ -419,6 +426,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
       CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
     else
       ID.AddPointer(CP->getConstVal());
+    ID.AddInteger(CP->getTargetFlags());
     break;
   }
   case ISD::CALL: {
@@ -630,10 +638,13 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   case ISD::ExternalSymbol:
     Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
     break;
-  case ISD::TargetExternalSymbol:
-    Erased =
-      TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+  case ISD::TargetExternalSymbol: {
+    ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+    Erased = TargetExternalSymbols.erase(
+               std::pair<std::string,unsigned char>(ESN->getSymbol(),
+                                                    ESN->getTargetFlags()));
     break;
+  }
   case ISD::VALUETYPE: {
     MVT VT = cast<VTSDNode>(N)->getVT();
     if (VT.isExtended()) {
@@ -953,9 +964,11 @@ SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
                                        MVT VT, int64_t Offset,
-                                       bool isTargetGA) {
-  unsigned Opc;
-
+                                       bool isTargetGA,
+                                       unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTargetGA) &&
+         "Cannot set target flags on target-independent globals");
+  
   // Truncate (with sign-extension) the offset value to the pointer size.
   unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
   if (BitWidth < 64)
@@ -968,6 +981,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
       GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
   }
 
+  unsigned Opc;
   if (GVar && GVar->isThreadLocal())
     Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
   else
@@ -977,11 +991,12 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddPointer(GV);
   ID.AddInteger(Offset);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
-  new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset);
+  new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1002,16 +1017,20 @@ SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){
+SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget,
+                                   unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent jump tables");
   unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddInteger(JTI);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
-  new (N) JumpTableSDNode(JTI, VT, isTarget);
+  new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1019,7 +1038,10 @@ SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){
 
 SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
                                       unsigned Alignment, int Offset,
-                                      bool isTarget) {
+                                      bool isTarget, 
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
     Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1028,11 +1050,12 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
   ID.AddInteger(Alignment);
   ID.AddInteger(Offset);
   ID.AddPointer(C);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1041,7 +1064,10 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
 
 SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
                                       unsigned Alignment, int Offset,
-                                      bool isTarget) {
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
     Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1050,11 +1076,12 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
   ID.AddInteger(Alignment);
   ID.AddInteger(Offset);
   C->AddSelectionDAGCSEId(ID);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1106,16 +1133,19 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(false, Sym, VT);
+  new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) {
-  SDNode *&N = TargetExternalSymbols[Sym];
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT,
+                                              unsigned char TargetFlags) {
+  SDNode *&N =
+    TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+                                                               TargetFlags)];
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(true, Sym, VT);
+  new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -3181,27 +3211,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
-      // thing to do is generate a LoadExt/StoreTrunc pair.
+      // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
+      // to Load/Store if NVT==VT.
       // FIXME does the case above also need this?
-      if (TLI.isTypeLegal(VT)) {
-        Value = DAG.getLoad(VT, dl, Chain,
-                            getMemBasePlusOffset(Src, SrcOff, DAG),
-                            SrcSV, SrcSVOff + SrcOff, false, Align);
-        Store = DAG.getStore(Chain, dl, Value,
+      MVT NVT = TLI.getTypeToTransformTo(VT);
+      assert(NVT.bitsGE(VT));
+      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+                             getMemBasePlusOffset(Src, SrcOff, DAG),
+                             SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+      Store = DAG.getTruncStore(Chain, dl, Value,
                              getMemBasePlusOffset(Dst, DstOff, DAG),
-                             DstSV, DstSVOff + DstOff, false, DstAlign);
-      } else {
-        MVT NVT = VT;
-        while (!TLI.isTypeLegal(NVT)) {
-          NVT = (MVT::SimpleValueType(NVT.getSimpleVT() + 1));
-        }
-        Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
-                               getMemBasePlusOffset(Src, SrcOff, DAG),
-                               SrcSV, SrcSVOff + SrcOff, VT, false, Align);
-        Store = DAG.getTruncStore(Chain, dl, Value,
-                               getMemBasePlusOffset(Dst, DstOff, DAG),
-                               DstSV, DstSVOff + DstOff, VT, false, DstAlign);
-      }
+                             DstSV, DstSVOff + DstOff, VT, false, DstAlign);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -4915,15 +4935,10 @@ HandleSDNode::~HandleSDNode() {
   DropOperands();
 }
 
-GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
-                                         MVT VT, int64_t o)
-  : SDNode(isa<GlobalVariable>(GA) &&
-           cast<GlobalVariable>(GA)->isThreadLocal() ?
-           // Thread Local
-           (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :
-           // Non Thread Local
-           (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),
-           DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) {
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
+                                         MVT VT, int64_t o, unsigned char TF)
+  : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
+    Offset(o), TargetFlags(TF) {
   TheGlobal = const_cast<GlobalValue*>(GA);
 }
 
@@ -4987,14 +5002,17 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
   AddNodeIDNode(ID, this);
 }
 
+static ManagedStatic<std::set<MVT, MVT::compareRawBits> > EVTs;
+static MVT VTs[MVT::LAST_VALUETYPE];
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
 /// getValueTypeList - Return a pointer to the specified value type.
 ///
 const MVT *SDNode::getValueTypeList(MVT VT) {
+  sys::SmartScopedLock<true> Lock(&*VTMutex);
   if (VT.isExtended()) {
-    static std::set<MVT, MVT::compareRawBits> EVTs;
-    return &(*EVTs.insert(VT).first);
+    return &(*EVTs->insert(VT).first);
   } else {
-    static MVT VTs[MVT::LAST_VALUETYPE];
     VTs[VT.getSimpleVT()] = VT;
     return &VTs[VT.getSimpleVT()];
   }
@@ -5486,10 +5504,14 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
+    if (unsigned char TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
     OS << "<" << FIDN->getIndex() << ">";
   } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
     OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned char TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
     int offset = CP->getOffset();
     if (CP->isMachineConstantPoolEntry())
@@ -5500,6 +5522,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
+    if (unsigned char TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
     OS << "<";
     const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
@@ -5516,6 +5540,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
     OS << "'" << ES->getSymbol() << "'";
+    if (unsigned char TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
     if (M->getValue())
       OS << "<" << M->getValue() << ">";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3eec684..6fd5df2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -94,7 +94,8 @@ namespace llvm {
     
 
     static std::string getNodeLabel(const SDNode *Node,
-                                    const SelectionDAG *Graph);
+                                    const SelectionDAG *Graph,
+                                    bool ShortNames);
     static std::string getNodeAttributes(const SDNode *N,
                                          const SelectionDAG *Graph) {
 #ifndef NDEBUG
@@ -120,139 +121,14 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
-                                                        const SelectionDAG *G) {
-  std::string Op = Node->getOperationName(G);
-
-  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {
-    Op += ": " + utostr(CSDN->getZExtValue());
-  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {
-    Op += ": " + ftostr(CSDN->getValueAPF());
-  } else if (const GlobalAddressSDNode *GADN =
-             dyn_cast<GlobalAddressSDNode>(Node)) {
-    Op += ": " + GADN->getGlobal()->getName();
-    if (int64_t Offset = GADN->getOffset()) {
-      if (Offset > 0)
-        Op += "+" + itostr(Offset);
-      else
-        Op += itostr(Offset);
-    }
-  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {
-    Op += " " + itostr(FIDN->getIndex());
-  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {
-    Op += " " + itostr(JTDN->getIndex());
-  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){
-    if (CP->isMachineConstantPoolEntry()) {
-      Op += '<';
-      {
-        raw_string_ostream OSS(Op);
-        OSS << *CP->getMachineCPVal();
-      }
-      Op += '>';
-    } else {
-      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
-        Op += "<" + ftostr(CFP->getValueAPF()) + ">";
-      else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
-        Op += "<" + utostr(CI->getZExtValue()) + ">";
-      else {
-        Op += '<';
-        {
-          raw_string_ostream OSS(Op);
-          WriteAsOperand(OSS, CP->getConstVal(), false);
-        }
-        Op += '>';
-      }
-    }
-    Op += " A=" + itostr(CP->getAlignment());
-  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
-    Op = "BB: ";
-    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
-    if (LBB)
-      Op += LBB->getName();
-    //Op += " " + (const void*)BBDN->getBasicBlock();
-  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {
-    if (G && R->getReg() != 0 &&
-        TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
-      Op = Op + " " +
-        G->getTarget().getRegisterInfo()->getName(R->getReg());
-    } else {
-      Op += " #" + utostr(R->getReg());
-    }
-  } else if (const DbgStopPointSDNode *D = dyn_cast<DbgStopPointSDNode>(Node)) {
-    DICompileUnit CU(cast<GlobalVariable>(D->getCompileUnit()));
-    std::string FN;
-    Op += ": " + CU.getFilename(FN);
-    Op += ":" + utostr(D->getLine());
-    if (D->getColumn() != 0)
-      Op += ":" + utostr(D->getColumn());
-  } else if (const LabelSDNode *L = dyn_cast<LabelSDNode>(Node)) {
-    Op += ": LabelID=" + utostr(L->getLabelID());
-  } else if (const CallSDNode *C = dyn_cast<CallSDNode>(Node)) {
-    Op += ": CallingConv=" + utostr(C->getCallingConv());
-    if (C->isVarArg())
-      Op += ", isVarArg";
-    if (C->isTailCall())
-      Op += ", isTailCall";
-  } else if (const ExternalSymbolSDNode *ES =
-             dyn_cast<ExternalSymbolSDNode>(Node)) {
-    Op += "'" + std::string(ES->getSymbol()) + "'";
-  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {
-    if (M->getValue())
-      Op += "<" + M->getValue()->getName() + ">";
-    else
-      Op += "<null>";
-  } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(Node)) {
-    const Value *V = M->MO.getValue();
-    Op += '<';
-    if (!V) {
-      Op += "(unknown)";
-    } else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-      // PseudoSourceValues don't have names, so use their print method.
-      raw_string_ostream OSS(Op);
-      PSV->print(OSS);
-    } else {
-      Op += V->getName();
-    }
-    Op += '+' + itostr(M->MO.getOffset()) + '>';
-  } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(Node)) {
-    Op = Op + " AF=" + N->getArgFlags().getArgFlagsString();
-  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {
-    Op = Op + " VT=" + N->getVT().getMVTString();
-  } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {
-    bool doExt = true;
-    switch (LD->getExtensionType()) {
-    default: doExt = false; break;
-    case ISD::EXTLOAD:
-      Op = Op + "<anyext ";
-      break;
-    case ISD::SEXTLOAD:
-      Op = Op + " <sext ";
-      break;
-    case ISD::ZEXTLOAD:
-      Op = Op + " <zext ";
-      break;
-    }
-    if (doExt)
-      Op += LD->getMemoryVT().getMVTString() + ">";
-    if (LD->isVolatile())
-      Op += "<V>";
-    Op += LD->getIndexedModeName(LD->getAddressingMode());
-    if (LD->getAlignment() > 1)
-      Op += " A=" + utostr(LD->getAlignment());
-  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {
-    if (ST->isTruncatingStore())
-      Op += "<trunc " + ST->getMemoryVT().getMVTString() + ">";
-    if (ST->isVolatile())
-      Op += "<V>";
-    Op += ST->getIndexedModeName(ST->getAddressingMode());
-    if (ST->getAlignment() > 1)
-      Op += " A=" + utostr(ST->getAlignment());
+                                                        const SelectionDAG *G,
+                                                        bool ShortNames) {
+  std::string Result = Node->getOperationName(G);
+  {
+    raw_string_ostream OS(Result);
+    Node->print_details(OS, G);
   }
-
-#if 0
-  Op += " Id=" + itostr(Node->getNodeId());
-#endif
-  
-  return Op;
+  return Result;
 }
 
 
@@ -262,7 +138,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false,
             Title);
 #else
   cerr << "SelectionDAG::viewGraph is only available in debug builds on "
@@ -393,7 +269,8 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
     for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
       FlaggedNodes.push_back(N);
     while (!FlaggedNodes.empty()) {
-      O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(), DAG);
+      O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(),
+                                                       DAG, false);
       FlaggedNodes.pop_back();
       if (!FlaggedNodes.empty())
         O << "\n    ";
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a771d46..83357e0 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -527,7 +527,7 @@ TargetLowering::~TargetLowering() {}
 /// computeRegisterProperties - Once all of the register classes are added,
 /// this allows us to compute derived properties we expose.
 void TargetLowering::computeRegisterProperties() {
-  assert(MVT::LAST_VALUETYPE <= 32 &&
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
 
   // Everything defaults to needing one register.
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 919a0ce..405cd80 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -78,24 +78,21 @@ protected:
     return miIdx;
   }  
 
-
   /// Insert a store of the given vreg to the given stack slot immediately
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
+  unsigned insertStoreAfter(MachineInstr *mi, unsigned ss,
                           unsigned vreg,
                           const TargetRegisterClass *trc) {
 
-    MachineBasicBlock::iterator nextInstItr(mi); 
-    ++nextInstItr;
+    MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
     unsigned miIdx = makeSpaceAfter(mi);
 
     tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
                              true, ss, trc);
-    MachineBasicBlock::iterator storeInstItr(mi);
-    ++storeInstItr;
+    MachineBasicBlock::iterator storeInstItr(next(mi));
     MachineInstr *storeInst = &*storeInstItr;
     unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
 
@@ -107,37 +104,81 @@ protected:
     return storeInstIdx;
   }
 
-  void insertStoreOnInterval(LiveInterval *li,
-                             MachineInstr *mi, unsigned ss,
-                             unsigned vreg,
-                             const TargetRegisterClass *trc) {
+  /// Insert a store of the given vreg to the given stack slot immediately
+  /// before the given instructnion. Returns the base index of the inserted
+  /// Instruction.
+  unsigned insertStoreBefore(MachineInstr *mi, unsigned ss,
+                            unsigned vreg,
+                            const TargetRegisterClass *trc) {
+    unsigned miIdx = makeSpaceBefore(mi);
+  
+    tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
+    MachineBasicBlock::iterator storeInstItr(prior(mi));
+    MachineInstr *storeInst = &*storeInstItr;
+    unsigned storeInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+
+    assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
+           "Store inst index already in use.");
+
+    lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
+
+    return storeInstIdx;
+  }
+
+  void insertStoreAfterInstOnInterval(LiveInterval *li,
+                                      MachineInstr *mi, unsigned ss,
+                                      unsigned vreg,
+                                      const TargetRegisterClass *trc) {
 
-    unsigned storeInstIdx = insertStoreFor(mi, ss, vreg, trc);
+    unsigned storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
     unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
              end = lis->getUseIndex(storeInstIdx);
 
     VNInfo *vni =
       li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
     vni->kills.push_back(storeInstIdx);
+    DOUT << "    Inserting store range: [" << start << ", " << end << ")\n";
     LiveRange lr(start, end, vni);
       
     li->addRange(lr);
   }
 
-  /// Insert a load of the given veg from the given stack slot immediately
+  /// Insert a load of the given vreg from the given stack slot immediately
+  /// after the given instruction. Returns the base index of the inserted
+  /// instruction. The caller is responsibel for adding/removing an appropriate
+  /// range vreg's LiveInterval.
+  unsigned insertLoadAfter(MachineInstr *mi, unsigned ss,
+                          unsigned vreg,
+                          const TargetRegisterClass *trc) {
+
+    MachineBasicBlock::iterator nextInstItr(next(mi)); 
+
+    unsigned miIdx = makeSpaceAfter(mi);
+
+    tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
+    MachineBasicBlock::iterator loadInstItr(next(mi));
+    MachineInstr *loadInst = &*loadInstItr;
+    unsigned loadInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+
+    assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
+           "Store inst index already in use.");
+    
+    lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
+
+    return loadInstIdx;
+  }
+
+  /// Insert a load of the given vreg from the given stack slot immediately
   /// before the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertLoadFor(MachineInstr *mi, unsigned ss,
-                         unsigned vreg,
-                         const TargetRegisterClass *trc) {
-    MachineBasicBlock::iterator useInstItr(mi);
-  
+  unsigned insertLoadBefore(MachineInstr *mi, unsigned ss,
+                            unsigned vreg,
+                            const TargetRegisterClass *trc) {  
     unsigned miIdx = makeSpaceBefore(mi);
   
-    tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, vreg, ss, trc);
-    MachineBasicBlock::iterator loadInstItr(mi);
-    --loadInstItr;
+    tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
+    MachineBasicBlock::iterator loadInstItr(prior(mi));
     MachineInstr *loadInst = &*loadInstItr;
     unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
 
@@ -149,18 +190,19 @@ protected:
     return loadInstIdx;
   }
 
-  void insertLoadOnInterval(LiveInterval *li,
-                            MachineInstr *mi, unsigned ss, 
-                            unsigned vreg,
-                            const TargetRegisterClass *trc) {
+  void insertLoadBeforeInstOnInterval(LiveInterval *li,
+                                      MachineInstr *mi, unsigned ss, 
+                                      unsigned vreg,
+                                      const TargetRegisterClass *trc) {
 
-    unsigned loadInstIdx = insertLoadFor(mi, ss, vreg, trc);
+    unsigned loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
     unsigned start = lis->getDefIndex(loadInstIdx),
              end = lis->getUseIndex(lis->getInstructionIndex(mi));
 
     VNInfo *vni =
       li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
     vni->kills.push_back(lis->getInstructionIndex(mi));
+    DOUT << "    Intserting load range: [" << start << ", " << end << ")\n";
     LiveRange lr(start, end, vni);
 
     li->addRange(lr);
@@ -180,6 +222,8 @@ protected:
     assert(!li->isStackSlot() &&
            "Trying to spill a stack slot.");
 
+    DOUT << "Trivial spill everywhere of reg" << li->reg << "\n";
+
     std::vector<LiveInterval*> added;
     
     const TargetRegisterClass *trc = mri->getRegClass(li->reg);
@@ -189,6 +233,9 @@ protected:
          regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
 
       MachineInstr *mi = &*regItr;
+
+      DOUT << "  Processing " << *mi;
+
       do {
         ++regItr;
       } while (regItr != mri->reg_end() && (&*regItr == mi));
@@ -227,11 +274,11 @@ protected:
       assert(hasUse || hasDef);
 
       if (hasUse) {
-        insertLoadOnInterval(newLI, mi, ss, newVReg, trc);
+        insertLoadBeforeInstOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       if (hasDef) {
-        insertStoreOnInterval(newLI, mi, ss, newVReg, trc);
+        insertStoreAfterInstOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       added.push_back(newLI);
@@ -258,29 +305,53 @@ public:
 
   std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno)  {
     std::vector<LiveInterval*> spillIntervals;
-    MachineBasicBlock::iterator storeInsertPoint;
+
+    if (!valno->isDefAccurate() && !valno->isPHIDef()) {
+      // Early out for values which have no well defined def point.
+      return spillIntervals;
+    }
+
+    // Ok.. we should be able to proceed...
+    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+    unsigned ss = vrm->assignVirt2StackSlot(li->reg);    
+    vrm->grow();
+    vrm->assignVirt2StackSlot(li->reg, ss);
+
+    MachineInstr *mi = 0;
+    unsigned storeIdx = 0;
 
     if (valno->isDefAccurate()) {
       // If we have an accurate def we can just grab an iterator to the instr
       // after the def.
-      storeInsertPoint =
-        next(MachineBasicBlock::iterator(lis->getInstructionFromIndex(valno->def)));
+      mi = lis->getInstructionFromIndex(valno->def);
+      storeIdx = insertStoreAfter(mi, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::DEF;
     } else {
-      // If the def info isn't accurate we check if this is a PHI def.
-      // If it is then def holds the index of the defining Basic Block, and we
-      // can use that to get an insertion point.
-      if (valno->isPHIDef()) {
-
-      } else {
-        // We have no usable def info. We can't split this value sensibly.
-        // FIXME: Need sensible feedback for "failure to split", an empty
-        // set of spill intervals could be reasonably returned from a
-        // split where both the store and load are folded.
-        return spillIntervals;
-      }
+      // if we get here we have a PHI def.
+      mi = &lis->getMBBFromIndex(valno->def)->front();
+      storeIdx = insertStoreBefore(mi, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::DEF;
+    }
+
+    MachineBasicBlock *defBlock = mi->getParent();
+    unsigned loadIdx = 0;
+
+    // Now we need to find the load...
+    MachineBasicBlock::iterator useItr(mi);
+    for (; !useItr->readsRegister(li->reg); ++useItr) {}
+
+    if (useItr != defBlock->end()) {
+      MachineInstr *loadInst = useItr;
+      loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::USE;
+    }
+    else {
+      MachineInstr *loadInst = &defBlock->back();
+      loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::USE;
     }
 
-        
+    li->removeRange(storeIdx, loadIdx, true);
 
     return spillIntervals;
   }
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
index bc830f7..f9d7fbb 100644
--- a/lib/CodeGen/VirtRegRewriter.h
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -32,11 +32,6 @@
 #include "VirtRegMap.h"
 #include <map>
 
-// TODO:
-//       - Finish renaming Spiller -> Rewriter
-//         - SimpleSpiller
-//         - LocalSpiller
-
 namespace llvm {
   
   /// VirtRegRewriter interface: Implementations of this interface assign
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index c0a1b84..816f793 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CompilerDriver/Action.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/System/Program.h"
 
 #include <iostream>
@@ -22,9 +22,6 @@
 using namespace llvm;
 using namespace llvmc;
 
-extern cl::opt<bool> DryRun;
-extern cl::opt<bool> VerboseMode;
-
 namespace {
   int ExecuteProgram(const std::string& name,
                      const StrVector& args) {
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index dece4e8..1212a21 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/CompilationGraph.h"
 #include "llvm/CompilerDriver/Error.h"
 
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/GraphWriter.h"
 
@@ -30,9 +30,6 @@
 using namespace llvm;
 using namespace llvmc;
 
-extern cl::list<std::string> InputFilenames;
-extern cl::list<std::string> Languages;
-
 namespace llvmc {
 
   const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
@@ -477,7 +474,8 @@ namespace llvm {
   {
 
     template<typename GraphType>
-    static std::string getNodeLabel(const Node* N, const GraphType&)
+    static std::string getNodeLabel(const Node* N, const GraphType&,
+                                    bool ShortNames)
     {
       if (N->ToolPtr)
         if (N->ToolPtr->IsJoin())
diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp
index 75abbd0..cb3c7be 100644
--- a/lib/CompilerDriver/Plugin.cpp
+++ b/lib/CompilerDriver/Plugin.cpp
@@ -12,7 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CompilerDriver/Plugin.h"
-
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/System/Mutex.h"
 #include <algorithm>
 #include <vector>
 
@@ -28,6 +29,7 @@ namespace {
   static bool pluginListInitialized = false;
   typedef std::vector<const llvmc::BasePlugin*> PluginList;
   static PluginList Plugins;
+  static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > PluginMutex;
 
   struct ByPriority {
     bool operator()(const llvmc::BasePlugin* lhs,
@@ -40,6 +42,7 @@ namespace {
 namespace llvmc {
 
   PluginLoader::PluginLoader() {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     if (!pluginListInitialized) {
       for (PluginRegistry::iterator B = PluginRegistry::begin(),
              E = PluginRegistry::end(); B != E; ++B)
@@ -50,6 +53,7 @@ namespace llvmc {
   }
 
   PluginLoader::~PluginLoader() {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     if (pluginListInitialized) {
       for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
            B != E; ++B)
@@ -59,12 +63,14 @@ namespace llvmc {
   }
 
   void PluginLoader::PopulateLanguageMap(LanguageMap& langMap) {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
          B != E; ++B)
       (*B)->PopulateLanguageMap(langMap);
   }
 
   void PluginLoader::PopulateCompilationGraph(CompilationGraph& graph) {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
          B != E; ++B)
       (*B)->PopulateCompilationGraph(graph);
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 886b26b..e704dd9 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -11,16 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/Tool.h"
 
 #include "llvm/System/Path.h"
-#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 using namespace llvmc;
 
-extern cl::opt<std::string> OutputFilename;
-
 namespace {
   sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
                          const std::string& Suffix) {
@@ -39,7 +37,7 @@ namespace {
     // NOTE: makeUnique always *creates* a unique temporary file,
     // which is good, since there will be no races. However, some
     // tools do not like it when the output file already exists, so
-    // they have to be placated with -f or something like that.
+    // they need to be placated with -f or something like that.
     Out.makeUnique(true, NULL);
     return Out;
   }
@@ -52,7 +50,7 @@ sys::Path Tool::OutFilename(const sys::Path& In,
   sys::Path Out;
 
   if (StopCompilation) {
-    if (!OutputFilename.empty()) {
+    if (!OutputFilename.empty() && SaveTemps != SaveTempsEnum::Obj ) {
       Out.set(OutputFilename);
     }
     else if (IsJoin()) {
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 7dfeae0..bb3f64e 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -29,7 +29,6 @@
 using namespace llvm;
 
 STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
-static Interpreter *TheEE = 0;
 
 static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
           cl::desc("make the interpreter print every volatile load and store"));
@@ -51,10 +50,6 @@ static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
   SF.Values[V] = Val;
 }
 
-void Interpreter::initializeExecutionEngine() {
-  TheEE = this;
-}
-
 //===----------------------------------------------------------------------===//
 //                    Binary Instruction Implementations
 //===----------------------------------------------------------------------===//
@@ -815,7 +810,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
 
 void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) {
   ExecutionContext &SF = ECStack.back();
-  SetValue(&I, TheEE->executeGEPOperation(I.getPointerOperand(),
+  SetValue(&I, executeGEPOperation(I.getPointerOperand(),
                                    gep_type_begin(I), gep_type_end(I), SF), SF);
 }
 
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index ded65d5..d7f38ef5 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -29,10 +29,7 @@ static struct RegisterInterp {
 
 }
 
-namespace llvm {
-  void LinkInInterpreter() {
-  }
-}
+extern "C" void LLVMLinkInInterpreter() { }
 
 /// create - Create a new interpreter object.  This can never fail.
 ///
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 8a285ec..6b13c90 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -202,7 +202,7 @@ private:  // Helper functions
 
   void *getPointerToFunction(Function *F) { return (void*)F; }
 
-  void initializeExecutionEngine();
+  void initializeExecutionEngine() { }
   void initializeExternalFunctions();
   GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
   GenericValue getOperandValue(Value *V, ExecutionContext &SF);
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index e0c13a1..bf915f7 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -7,5 +7,6 @@ add_llvm_library(LLVMJIT
   JITDwarfEmitter.cpp
   JITEmitter.cpp
   JITMemoryManager.cpp
+  MacOSJITEventListener.cpp
   TargetSelect.cpp
   )
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 43995cb..db5a306 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -20,8 +20,9 @@
 #include "llvm/Instructions.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -60,9 +61,7 @@ static struct RegisterJIT {
 
 }
 
-namespace llvm {
-  void LinkInJIT() {
-  }
+extern "C" void LLVMLinkInJIT() {
 }
 
 
@@ -509,6 +508,40 @@ GenericValue JIT::runFunction(Function *F,
   return runFunction(Stub, std::vector<GenericValue>());
 }
 
+void JIT::RegisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  EventListeners.push_back(L);
+}
+void JIT::UnregisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  std::vector<JITEventListener*>::reverse_iterator I=
+      std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+  if (I != EventListeners.rend()) {
+    std::swap(*I, EventListeners.back());
+    EventListeners.pop_back();
+  }
+}
+void JIT::NotifyFunctionEmitted(
+    const Function &F,
+    void *Code, size_t Size,
+    const JITEvent_EmittedFunctionDetails &Details) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
+  }
+}
+
+void JIT::NotifyFreeingMachineCode(const Function &F, void *OldPtr) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFreeingMachineCode(F, OldPtr);
+  }
+}
+
 /// runJITOnFunction - Run the FunctionPassManager full of
 /// just-in-time compilation passes on F, hopefully filling in
 /// GlobalAddress[F] with the address of F's machine code.
@@ -516,11 +549,23 @@ GenericValue JIT::runFunction(Function *F,
 void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
   MutexGuard locked(lock);
 
-  registerMachineCodeInfo(MCI);
+  class MCIListener : public JITEventListener {
+    MachineCodeInfo *const MCI;
+   public:
+    MCIListener(MachineCodeInfo *mci) : MCI(mci) {}
+    virtual void NotifyFunctionEmitted(const Function &,
+                                       void *Code, size_t Size,
+                                       const EmittedFunctionDetails &) {
+      MCI->setAddress(Code);
+      MCI->setSize(Size);
+    }
+  };
+  MCIListener MCIL(MCI);
+  RegisterJITEventListener(&MCIL);
 
   runJITOnFunctionUnlocked(F, locked);
 
-  registerMachineCodeInfo(0);
+  UnregisterJITEventListener(&MCIL);
 }
 
 void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
@@ -711,3 +756,6 @@ void JIT::addPendingFunction(Function *F) {
   MutexGuard locked(lock);
   jitstate->getPendingFunctions(locked).push_back(F);
 }
+
+
+JITEventListener::~JITEventListener() {}
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 3ccb2dd..66417a7 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -20,10 +20,11 @@
 namespace llvm {
 
 class Function;
-class TargetMachine;
-class TargetJITInfo;
+class JITEvent_EmittedFunctionDetails;
 class MachineCodeEmitter;
 class MachineCodeInfo;
+class TargetJITInfo;
+class TargetMachine;
 
 class JITState {
 private:
@@ -52,6 +53,7 @@ class JIT : public ExecutionEngine {
   TargetMachine &TM;       // The current target we are compiling to
   TargetJITInfo &TJI;      // The JITInfo for the target we are compiling to
   JITCodeEmitter *JCE;     // JCE object
+  std::vector<JITEventListener*> EventListeners;
 
   JITState *jitstate;
 
@@ -157,9 +159,18 @@ public:
   // Run the JIT on F and return information about the generated code
   void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
 
+  virtual void RegisterJITEventListener(JITEventListener *L);
+  virtual void UnregisterJITEventListener(JITEventListener *L);
+  /// These functions correspond to the methods on JITEventListener.  They
+  /// iterate over the registered listeners and call the corresponding method on
+  /// each.
+  void NotifyFunctionEmitted(
+      const Function &F, void *Code, size_t Size,
+      const JITEvent_EmittedFunctionDetails &Details);
+  void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
+
 private:
   static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM);
-  void registerMachineCodeInfo(MachineCodeInfo *MCI);
   void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
   void updateFunctionStub(Function *F);
   void updateDlsymStubTable();
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 43f23e4..8fe7ab8 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -24,8 +24,9 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -411,136 +412,6 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 }
 
 //===----------------------------------------------------------------------===//
-// Function Index Support
-
-// On MacOS we generate an index of currently JIT'd functions so that
-// performance tools can determine a symbol name and accurate code range for a
-// PC value.  Because performance tools are generally asynchronous, the code
-// below is written with the hope that it could be interrupted at any time and
-// have useful answers.  However, we don't go crazy with atomic operations, we
-// just do a "reasonable effort".
-#ifdef __APPLE__ 
-#define ENABLE_JIT_SYMBOL_TABLE 0
-#endif
-
-/// JitSymbolEntry - Each function that is JIT compiled results in one of these
-/// being added to an array of symbols.  This indicates the name of the function
-/// as well as the address range it occupies.  This allows the client to map
-/// from a PC value to the name of the function.
-struct JitSymbolEntry {
-  const char *FnName;   // FnName - a strdup'd string.
-  void *FnStart;
-  intptr_t FnSize;
-};
-
-
-struct JitSymbolTable {
-  /// NextPtr - This forms a linked list of JitSymbolTable entries.  This
-  /// pointer is not used right now, but might be used in the future.  Consider
-  /// it reserved for future use.
-  JitSymbolTable *NextPtr;
-  
-  /// Symbols - This is an array of JitSymbolEntry entries.  Only the first
-  /// 'NumSymbols' symbols are valid.
-  JitSymbolEntry *Symbols;
-  
-  /// NumSymbols - This indicates the number entries in the Symbols array that
-  /// are valid.
-  unsigned NumSymbols;
-  
-  /// NumAllocated - This indicates the amount of space we have in the Symbols
-  /// array.  This is a private field that should not be read by external tools.
-  unsigned NumAllocated;
-};
-
-#if ENABLE_JIT_SYMBOL_TABLE 
-JitSymbolTable *__jitSymbolTable;
-#endif
-
-static void AddFunctionToSymbolTable(const char *FnName, 
-                                     void *FnStart, intptr_t FnSize) {
-  assert(FnName != 0 && FnStart != 0 && "Bad symbol to add");
-  JitSymbolTable **SymTabPtrPtr = 0;
-#if !ENABLE_JIT_SYMBOL_TABLE
-  return;
-#else
-  SymTabPtrPtr = &__jitSymbolTable;
-#endif
-  
-  // If this is the first entry in the symbol table, add the JitSymbolTable
-  // index.
-  if (*SymTabPtrPtr == 0) {
-    JitSymbolTable *New = new JitSymbolTable();
-    New->NextPtr = 0;
-    New->Symbols = 0;
-    New->NumSymbols = 0;
-    New->NumAllocated = 0;
-    *SymTabPtrPtr = New;
-  }
-  
-  JitSymbolTable *SymTabPtr = *SymTabPtrPtr;
-  
-  // If we have space in the table, reallocate the table.
-  if (SymTabPtr->NumSymbols >= SymTabPtr->NumAllocated) {
-    // If we don't have space, reallocate the table.
-    unsigned NewSize = std::max(64U, SymTabPtr->NumAllocated*2);
-    JitSymbolEntry *NewSymbols = new JitSymbolEntry[NewSize];
-    JitSymbolEntry *OldSymbols = SymTabPtr->Symbols;
-    
-    // Copy the old entries over.
-    memcpy(NewSymbols, OldSymbols, SymTabPtr->NumSymbols*sizeof(OldSymbols[0]));
-    
-    // Swap the new symbols in, delete the old ones.
-    SymTabPtr->Symbols = NewSymbols;
-    SymTabPtr->NumAllocated = NewSize;
-    delete [] OldSymbols;
-  }
-  
-  // Otherwise, we have enough space, just tack it onto the end of the array.
-  JitSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols];
-  Entry.FnName = strdup(FnName);
-  Entry.FnStart = FnStart;
-  Entry.FnSize = FnSize;
-  ++SymTabPtr->NumSymbols;
-}
-
-static void RemoveFunctionFromSymbolTable(void *FnStart) {
-  assert(FnStart && "Invalid function pointer");
-  JitSymbolTable **SymTabPtrPtr = 0;
-#if !ENABLE_JIT_SYMBOL_TABLE
-  return;
-#else
-  SymTabPtrPtr = &__jitSymbolTable;
-#endif
-  
-  JitSymbolTable *SymTabPtr = *SymTabPtrPtr;
-  JitSymbolEntry *Symbols = SymTabPtr->Symbols;
-  
-  // Scan the table to find its index.  The table is not sorted, so do a linear
-  // scan.
-  unsigned Index;
-  for (Index = 0; Symbols[Index].FnStart != FnStart; ++Index)
-    assert(Index != SymTabPtr->NumSymbols && "Didn't find function!");
-  
-  // Once we have an index, we know to nuke this entry, overwrite it with the
-  // entry at the end of the array, making the last entry redundant.
-  const char *OldName = Symbols[Index].FnName;
-  Symbols[Index] = Symbols[SymTabPtr->NumSymbols-1];
-  free((void*)OldName);
-  
-  // Drop the number of symbols in the table.
-  --SymTabPtr->NumSymbols;
-
-  // Finally, if we deleted the final symbol, deallocate the table itself.
-  if (SymTabPtr->NumSymbols != 0) 
-    return;
-  
-  *SymTabPtrPtr = 0;
-  delete [] Symbols;
-  delete SymTabPtr;
-}
-
-//===----------------------------------------------------------------------===//
 // JITEmitter code.
 //
 namespace {
@@ -616,11 +487,8 @@ namespace {
     // in the JITResolver's ExternalFnToStubMap.
     StringMap<void *> ExtFnStubs;
 
-    // MCI - A pointer to a MachineCodeInfo object to update with information.
-    MachineCodeInfo *MCI;
-
   public:
-    JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0), MCI(0) {
+    JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
@@ -716,10 +584,6 @@ namespace {
     
     JITMemoryManager *getMemMgr(void) const { return MemMgr; }
 
-    void setMachineCodeInfo(MachineCodeInfo *mci) {
-      MCI = mci;
-    }
-
   private:
     void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub);
     void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
@@ -1157,21 +1021,16 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
 
   // Invalidate the icache if necessary.
   sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
-  
-  // Add it to the JIT symbol table if the host wants it.
-  AddFunctionToSymbolTable(F.getFunction()->getNameStart(),
-                           FnStart, FnEnd-FnStart);
+
+  JITEvent_EmittedFunctionDetails Details;
+  TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
+                                Details);
 
   DOUT << "JIT: Finished CodeGen of [" << (void*)FnStart
        << "] Function: " << F.getFunction()->getName()
        << ": " << (FnEnd-FnStart) << " bytes of text, "
        << Relocations.size() << " relocations\n";
 
-  if (MCI) {
-    MCI->setAddress(FnStart);
-    MCI->setSize(FnEnd-FnStart);
-  }
-
   Relocations.clear();
   ConstPoolAddresses.clear();
 
@@ -1495,13 +1354,6 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
   return JE->getJITResolver().getFunctionStub(F);
 }
 
-void JIT::registerMachineCodeInfo(MachineCodeInfo *mc) {
-  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
-  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
-
-  JE->setMachineCodeInfo(mc);
-}
-
 void JIT::updateFunctionStub(Function *F) {
   // Get the empty stub we generated earlier.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
@@ -1609,10 +1461,9 @@ void JIT::freeMachineCodeForFunction(Function *F) {
   void *OldPtr = updateGlobalMapping(F, 0);
 
   if (OldPtr)
-    RemoveFunctionFromSymbolTable(OldPtr);
+    TheJIT->NotifyFreeingMachineCode(*F, OldPtr);
 
   // Free the actual memory for the function body and related stuff.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
   cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
 }
-
diff --git a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
new file mode 100644
index 0000000..3b8b84c
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
@@ -0,0 +1,173 @@
+//===-- MacOSJITEventListener.cpp - Save symbol table for OSX perf tools --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that records JITted functions to
+// a global __jitSymbolTable linked list.  Apple's performance tools use this to
+// determine a symbol name and accurate code range for a PC value.  Because
+// performance tools are generally asynchronous, the code below is written with
+// the hope that it could be interrupted at any time and have useful answers.
+// However, we don't go crazy with atomic operations, we just do a "reasonable
+// effort".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "macos-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include <stddef.h>
+using namespace llvm;
+
+#ifdef __APPLE__
+#define ENABLE_JIT_SYMBOL_TABLE 0
+#endif
+
+#if ENABLE_JIT_SYMBOL_TABLE
+
+namespace {
+
+/// JITSymbolEntry - Each function that is JIT compiled results in one of these
+/// being added to an array of symbols.  This indicates the name of the function
+/// as well as the address range it occupies.  This allows the client to map
+/// from a PC value to the name of the function.
+struct JITSymbolEntry {
+  const char *FnName;   // FnName - a strdup'd string.
+  void *FnStart;
+  intptr_t FnSize;
+};
+
+
+struct JITSymbolTable {
+  /// NextPtr - This forms a linked list of JitSymbolTable entries.  This
+  /// pointer is not used right now, but might be used in the future.  Consider
+  /// it reserved for future use.
+  JITSymbolTable *NextPtr;
+  
+  /// Symbols - This is an array of JitSymbolEntry entries.  Only the first
+  /// 'NumSymbols' symbols are valid.
+  JITSymbolEntry *Symbols;
+  
+  /// NumSymbols - This indicates the number entries in the Symbols array that
+  /// are valid.
+  unsigned NumSymbols;
+  
+  /// NumAllocated - This indicates the amount of space we have in the Symbols
+  /// array.  This is a private field that should not be read by external tools.
+  unsigned NumAllocated;
+};
+
+class MacOSJITEventListener : public JITEventListener {
+public:
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *FnStart, size_t FnSize,
+                                     const EmittedFunctionDetails &Details);
+  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
+};
+
+}  // anonymous namespace.
+
+// This is a public symbol so the performance tools can find it.
+JITSymbolTable *__jitSymbolTable;
+
+namespace llvm {
+JITEventListener *createMacOSJITEventListener() {
+  return new MacOSJITEventListener;
+}
+}
+
+// Adds the just-emitted function to the symbol table.
+void MacOSJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const EmittedFunctionDetails &) {
+  const char *const FnName = F.getNameStart();
+  assert(FnName != 0 && FnStart != 0 && "Bad symbol to add");
+  JITSymbolTable **SymTabPtrPtr = 0;
+  SymTabPtrPtr = &__jitSymbolTable;
+
+  // If this is the first entry in the symbol table, add the JITSymbolTable
+  // index.
+  if (*SymTabPtrPtr == 0) {
+    JITSymbolTable *New = new JITSymbolTable();
+    New->NextPtr = 0;
+    New->Symbols = 0;
+    New->NumSymbols = 0;
+    New->NumAllocated = 0;
+    *SymTabPtrPtr = New;
+  }
+
+  JITSymbolTable *SymTabPtr = *SymTabPtrPtr;
+
+  // If we have space in the table, reallocate the table.
+  if (SymTabPtr->NumSymbols >= SymTabPtr->NumAllocated) {
+    // If we don't have space, reallocate the table.
+    unsigned NewSize = std::max(64U, SymTabPtr->NumAllocated*2);
+    JITSymbolEntry *NewSymbols = new JITSymbolEntry[NewSize];
+    JITSymbolEntry *OldSymbols = SymTabPtr->Symbols;
+
+    // Copy the old entries over.
+    memcpy(NewSymbols, OldSymbols, SymTabPtr->NumSymbols*sizeof(OldSymbols[0]));
+
+    // Swap the new symbols in, delete the old ones.
+    SymTabPtr->Symbols = NewSymbols;
+    SymTabPtr->NumAllocated = NewSize;
+    delete [] OldSymbols;
+  }
+
+  // Otherwise, we have enough space, just tack it onto the end of the array.
+  JITSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols];
+  Entry.FnName = strdup(FnName);
+  Entry.FnStart = FnStart;
+  Entry.FnSize = FnSize;
+  ++SymTabPtr->NumSymbols;
+}
+
+// Removes the to-be-deleted function from the symbol table.
+void MacOSJITEventListener::NotifyFreeingMachineCode(
+    const Function &, void *FnStart) {
+  assert(FnStart && "Invalid function pointer");
+  JITSymbolTable **SymTabPtrPtr = 0;
+  SymTabPtrPtr = &__jitSymbolTable;
+
+  JITSymbolTable *SymTabPtr = *SymTabPtrPtr;
+  JITSymbolEntry *Symbols = SymTabPtr->Symbols;
+
+  // Scan the table to find its index.  The table is not sorted, so do a linear
+  // scan.
+  unsigned Index;
+  for (Index = 0; Symbols[Index].FnStart != FnStart; ++Index)
+    assert(Index != SymTabPtr->NumSymbols && "Didn't find function!");
+
+  // Once we have an index, we know to nuke this entry, overwrite it with the
+  // entry at the end of the array, making the last entry redundant.
+  const char *OldName = Symbols[Index].FnName;
+  Symbols[Index] = Symbols[SymTabPtr->NumSymbols-1];
+  free((void*)OldName);
+
+  // Drop the number of symbols in the table.
+  --SymTabPtr->NumSymbols;
+
+  // Finally, if we deleted the final symbol, deallocate the table itself.
+  if (SymTabPtr->NumSymbols != 0)
+    return;
+
+  *SymTabPtrPtr = 0;
+  delete [] Symbols;
+  delete SymTabPtr;
+}
+
+#else  // !ENABLE_JIT_SYMBOL_TABLE
+
+namespace llvm {
+// By defining this to return NULL, we can let clients call it unconditionally,
+// even if they aren't on an Apple system.
+JITEventListener *createMacOSJITEventListener() {
+  return NULL;
+}
+}  // namespace llvm
+
+#endif  // ENABLE_JIT_SYMBOL_TABLE
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
new file mode 100644
index 0000000..6307ffe
--- /dev/null
+++ b/lib/MC/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMMC
+  MCAsmStreamer.cpp
+  MCContext.cpp
+  MCStreamer.cpp
+  )
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
new file mode 100644
index 0000000..e38f2b3
--- /dev/null
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -0,0 +1,206 @@
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+  class MCAsmStreamer : public MCStreamer {
+    raw_ostream &OS;
+
+    MCSection *CurSection;
+
+  public:
+    MCAsmStreamer(MCContext &Context, raw_ostream &_OS)
+      : MCStreamer(Context), OS(_OS) {}
+    ~MCAsmStreamer() {}
+
+    /// @name MCStreamer Interface
+    /// @{
+
+    virtual void SwitchSection(MCSection *Section);
+
+    virtual void EmitLabel(MCSymbol *Symbol);
+
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
+                                bool MakeAbsolute = false);
+
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute);
+
+    virtual void EmitBytes(const char *Data, unsigned Length);
+
+    virtual void EmitValue(const MCValue &Value, unsigned Size);
+
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                      unsigned ValueSize = 1,
+                                      unsigned MaxBytesToEmit = 0);
+
+    virtual void EmitValueToOffset(const MCValue &Offset, 
+                                   unsigned char Value = 0);
+    
+    virtual void EmitInstruction(const MCInst &Inst);
+
+    virtual void Finish();
+    
+    /// @}
+  };
+
+}
+
+/// Allow printing values directly to a raw_ostream.
+static inline raw_ostream &operator<<(raw_ostream &os, const MCValue &Value) {
+  if (Value.getSymA()) {
+    os << Value.getSymA()->getName();
+    if (Value.getSymB())
+      os << " - " << Value.getSymB()->getName();
+    if (Value.getCst())
+      os << " + " << Value.getCst();
+  } else {
+    assert(!Value.getSymB() && "Invalid machine code value!");
+    os << Value.getCst();
+  }
+
+  return os;
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+  assert(Bytes && "Invalid size!");
+  return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+static inline MCValue truncateToSize(const MCValue &Value, unsigned Bytes) {
+  return MCValue::get(Value.getSymA(), Value.getSymB(), 
+                      truncateToSize(Value.getCst(), Bytes));
+}
+
+void MCAsmStreamer::SwitchSection(MCSection *Section) {
+  if (Section != CurSection) {
+    CurSection = Section;
+
+    // FIXME: Really we would like the segment, flags, etc. to be separate
+    // values instead of embedded in the name. Not all assemblers understand all
+    // this stuff though.
+    OS << ".section " << Section->getName() << "\n";
+  }
+}
+
+void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->getSection() == 0 && "Cannot emit a symbol twice!");
+  assert(CurSection && "Cannot emit before setting section!");
+  assert(!getContext().GetSymbolValue(Symbol) && 
+         "Cannot emit symbol which was directly assigned to!");
+
+  OS << Symbol->getName() << ":\n";
+  Symbol->setSection(CurSection);
+}
+
+void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
+                                   bool MakeAbsolute) {
+  assert(!Symbol->getSection() && "Cannot assign to a label!");
+
+  if (MakeAbsolute) {
+    OS << ".set " << Symbol->getName() << ", " << Value << '\n';
+  } else {
+    OS << Symbol->getName() << " = " << Value << '\n';
+  }
+
+  getContext().SetSymbolValue(Symbol, Value);
+}
+
+void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, 
+                                        SymbolAttr Attribute) {
+  switch (Attribute) {
+  case Global: OS << ".globl"; break;
+  case Hidden: OS << ".hidden"; break;
+  case IndirectSymbol: OS << ".indirect_symbol"; break;
+  case Internal: OS << ".internal"; break;
+  case LazyReference: OS << ".lazy_reference"; break;
+  case NoDeadStrip: OS << ".no_dead_strip"; break;
+  case PrivateExtern: OS << ".private_extern"; break;
+  case Protected: OS << ".protected"; break;
+  case Reference: OS << ".reference"; break;
+  case Weak: OS << ".weak"; break;
+  case WeakDefinition: OS << ".weak_definition"; break;
+  case WeakReference: OS << ".weak_reference"; break;
+  }
+
+  OS << ' ' << Symbol->getName() << '\n';
+}
+
+void MCAsmStreamer::EmitBytes(const char *Data, unsigned Length) {
+  assert(CurSection && "Cannot emit contents before setting section!");
+  for (unsigned i = 0; i != Length; ++i)
+    OS << ".byte " << (unsigned) Data[i] << '\n';
+}
+
+void MCAsmStreamer::EmitValue(const MCValue &Value, unsigned Size) {
+  assert(CurSection && "Cannot emit contents before setting section!");
+  // Need target hooks to know how to print this.
+  switch (Size) {
+  default:
+    assert(0 && "Invalid size for machine code value!");
+  case 1: OS << ".byte"; break;
+  case 2: OS << ".short"; break;
+  case 4: OS << ".long"; break;
+  case 8: OS << ".quad"; break;
+  }
+
+  OS << ' ' << truncateToSize(Value, Size) << '\n';
+}
+
+void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                         unsigned ValueSize,
+                                         unsigned MaxBytesToEmit) {
+  unsigned Pow2 = Log2_32(ByteAlignment);
+  assert((1U << Pow2) == ByteAlignment && "Invalid alignment!");
+
+  switch (ValueSize) {
+  default:
+    assert(0 && "Invalid size for machine code value!");
+  case 8:
+    assert(0 && "Unsupported alignment size!");
+  case 1: OS << ".p2align"; break;
+  case 2: OS << ".p2alignw"; break;
+  case 4: OS << ".p2alignl"; break;
+  }
+
+  OS << ' ' << Pow2;
+
+  OS << ", " << truncateToSize(Value, ValueSize);
+  if (MaxBytesToEmit) 
+    OS << ", " << MaxBytesToEmit;
+  OS << '\n';
+}
+
+void MCAsmStreamer::EmitValueToOffset(const MCValue &Offset, 
+                                      unsigned char Value) {
+  // FIXME: Verify that Offset is associated with the current section.
+  OS << ".org " << Offset << ", " << (unsigned) Value << '\n';
+}
+
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(CurSection && "Cannot emit contents before setting section!");
+  // FIXME: Implement.
+  OS << "# FIXME: Implement instruction printing!\n";
+}
+
+void MCAsmStreamer::Finish() {
+  OS.flush();
+}
+    
+MCStreamer *llvm::createAsmStreamer(MCContext &Context, raw_ostream &OS) {
+  return new MCAsmStreamer(Context, OS);
+}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
new file mode 100644
index 0000000..6c6019c
--- /dev/null
+++ b/lib/MC/MCContext.cpp
@@ -0,0 +1,80 @@
+//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCContext.h"
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+using namespace llvm;
+
+MCContext::MCContext()
+{
+}
+
+MCContext::~MCContext() {
+}
+
+MCSection *MCContext::GetSection(const char *Name) {
+  MCSection *&Entry = Sections[Name];
+  
+  if (!Entry)
+    Entry = new (*this) MCSection(Name);
+
+  return Entry;
+}
+
+MCSymbol *MCContext::CreateSymbol(const char *Name) {
+  assert(Name[0] != '\0' && "Normal symbols cannot be unnamed!");
+
+  // Create and bind the symbol, and ensure that names are unique.
+  MCSymbol *&Entry = Symbols[Name];
+  assert(!Entry && "Duplicate symbol definition!");
+  return Entry = new (*this) MCSymbol(Name, false);
+}
+
+MCSymbol *MCContext::GetOrCreateSymbol(const char *Name) {
+  MCSymbol *&Entry = Symbols[Name];
+  if (Entry) return Entry;
+
+  return Entry = new (*this) MCSymbol(Name, false);
+}
+
+
+MCSymbol *MCContext::CreateTemporarySymbol(const char *Name) {
+  // If unnamed, just create a symbol.
+  if (Name[0] == '\0')
+    new (*this) MCSymbol("", true);
+    
+  // Otherwise create as usual.
+  MCSymbol *&Entry = Symbols[Name];
+  assert(!Entry && "Duplicate symbol definition!");
+  return Entry = new (*this) MCSymbol(Name, true);
+}
+
+MCSymbol *MCContext::LookupSymbol(const char *Name) const {
+  return Symbols.lookup(Name);
+}
+
+void MCContext::ClearSymbolValue(MCSymbol *Sym) {
+  SymbolValues.erase(Sym);
+}
+
+void MCContext::SetSymbolValue(MCSymbol *Sym, const MCValue &Value) {
+  SymbolValues[Sym] = Value;
+}
+
+const MCValue *MCContext::GetSymbolValue(MCSymbol *Sym) const {
+  DenseMap<MCSymbol*, MCValue>::iterator it = SymbolValues.find(Sym);
+
+  if (it == SymbolValues.end())
+    return 0;
+
+  return &it->second;
+}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
new file mode 100644
index 0000000..a634f33
--- /dev/null
+++ b/lib/MC/MCStreamer.cpp
@@ -0,0 +1,18 @@
+//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context) {
+}
+
+MCStreamer::~MCStreamer() {
+}
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
new file mode 100644
index 0000000..314a5b1
--- /dev/null
+++ b/lib/MC/Makefile
@@ -0,0 +1,15 @@
+##===- lib/MC/Makefile -------------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMMC
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Makefile b/lib/Makefile
index 8dd67d9..7199da5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,7 +9,7 @@
 LEVEL = ..
 
 PARALLEL_DIRS = VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Debugger Linker CompilerDriver
+                Target ExecutionEngine Debugger Linker CompilerDriver MC
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Support/Annotation.cpp b/lib/Support/Annotation.cpp
index 9c3efa3..b778043 100644
--- a/lib/Support/Annotation.cpp
+++ b/lib/Support/Annotation.cpp
@@ -68,9 +68,12 @@ AnnotationID AnnotationManager::getID(const char *Name) {  // Name -> ID
   if (I == E) {
     sys::SmartScopedWriter<true> Writer(&*AnnotationsLock);
     I = IDMap->find(Name);
-    if (I == IDMap->end())
-      (*IDMap)[Name] = IDCounter++;   // Add a new element
-    return AnnotationID(IDCounter-1);
+    if (I == IDMap->end()) {
+      unsigned newCount = sys::AtomicIncrement(&IDCounter);
+      (*IDMap)[Name] = newCount-1;   // Add a new element
+      return AnnotationID(newCount-1);
+    } else
+      return AnnotationID(I->second);
   }
   return AnnotationID(I->second);
 }
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index e7a76cc..f26c2c0 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -30,3 +30,5 @@ add_llvm_library(LLVMSupport
   Triple.cpp
   raw_ostream.cpp
   )
+
+target_link_libraries (LLVMSupport LLVMSystem)
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index c111c5e..14290a1 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -15,11 +15,12 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
+#include "llvm/System/ThreadLocal.h"
 #include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 // FIXME: This should be thread local when llvm supports threads.
-static const PrettyStackTraceEntry *PrettyStackTraceHead = 0;
+static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
 
 static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
   unsigned NextID = 0;
@@ -34,12 +35,12 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
 /// PrintCurStackTrace - Print the current stack trace to the specified stream.
 static void PrintCurStackTrace(raw_ostream &OS) {
   // Don't print an empty trace.
-  if (PrettyStackTraceHead == 0) return;
+  if (PrettyStackTraceHead.get() == 0) return;
   
   // If there are pretty stack frames registered, walk and emit them.
   OS << "Stack dump:\n";
   
-  PrintStack(PrettyStackTraceHead, OS);
+  PrintStack(PrettyStackTraceHead.get(), OS);
   OS.flush();
 }
 
@@ -84,14 +85,14 @@ PrettyStackTraceEntry::PrettyStackTraceEntry() {
   HandlerRegistered = HandlerRegistered;
     
   // Link ourselves.
-  NextEntry = PrettyStackTraceHead;
-  PrettyStackTraceHead = this;
+  NextEntry = PrettyStackTraceHead.get();
+  PrettyStackTraceHead.set(this);
 }
 
 PrettyStackTraceEntry::~PrettyStackTraceEntry() {
-  assert(PrettyStackTraceHead == this &&
+  assert(PrettyStackTraceHead.get() == this &&
          "Pretty stack trace entry destruction is out of order");
-  PrettyStackTraceHead = getNextEntry();
+  PrettyStackTraceHead.set(getNextEntry());
 }
 
 void PrettyStackTraceString::print(raw_ostream &OS) const {
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 6c652f8..33570b0 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -66,10 +66,14 @@ void Statistic::RegisterStatistic() {
   // If stats are enabled, inform StatInfo that this statistic should be
   // printed.
   sys::ScopedLock Writer(&*StatLock);
-  if (Enabled)
-    StatInfo->addStatistic(this);
-  // Remember we have been registered.
-  Initialized = true;
+  if (!Initialized) {
+    if (Enabled)
+      StatInfo->addStatistic(this);
+    
+    sys::MemoryFence();
+    // Remember we have been registered.
+    Initialized = true;
+  }
 }
 
 namespace {
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 69f967c..ede1dc9 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -38,6 +38,8 @@ static std::string &getLibSupportInfoOutputFilename() {
   return *LibSupportInfoOutputFilename;
 }
 
+static ManagedStatic<sys::SmartMutex<true> > TimerLock;
+
 namespace {
   static cl::opt<bool>
   TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
@@ -112,7 +114,8 @@ static inline size_t getMemUsage() {
 }
 
 struct TimeRecord {
-  int64_t Elapsed, UserTime, SystemTime, MemUsed;
+  double Elapsed, UserTime, SystemTime;
+  ssize_t MemUsed;
 };
 
 static TimeRecord getTimeRecord(bool Start) {
@@ -122,7 +125,7 @@ static TimeRecord getTimeRecord(bool Start) {
   sys::TimeValue user(0,0);
   sys::TimeValue sys(0,0);
 
-  int64_t MemUsed = 0;
+  ssize_t MemUsed = 0;
   if (Start) {
     MemUsed = getMemUsage();
     sys::Process::GetTimeUsage(now,user,sys);
@@ -131,9 +134,9 @@ static TimeRecord getTimeRecord(bool Start) {
     MemUsed = getMemUsage();
   }
 
-  Result.Elapsed  = now.seconds() * 1000000 + now.microseconds();
-  Result.UserTime = user.seconds() * 1000000 + user.microseconds();
-  Result.SystemTime  = sys.seconds() * 1000000 + sys.microseconds();
+  Result.Elapsed  = now.seconds()  + now.microseconds()  / 1000000.0;
+  Result.UserTime = user.seconds() + user.microseconds() / 1000000.0;
+  Result.SystemTime  = sys.seconds()  + sys.microseconds()  / 1000000.0;
   Result.MemUsed  = MemUsed;
 
   return Result;
@@ -142,6 +145,7 @@ static TimeRecord getTimeRecord(bool Start) {
 static ManagedStatic<std::vector<Timer*> > ActiveTimers;
 
 void Timer::startTimer() {
+  sys::SmartScopedLock<true> L(&Lock);
   Started = true;
   ActiveTimers->push_back(this);
   TimeRecord TR = getTimeRecord(true);
@@ -153,6 +157,7 @@ void Timer::startTimer() {
 }
 
 void Timer::stopTimer() {
+  sys::SmartScopedLock<true> L(&Lock);
   TimeRecord TR = getTimeRecord(false);
   Elapsed    += TR.Elapsed;
   UserTime   += TR.UserTime;
@@ -170,11 +175,27 @@ void Timer::stopTimer() {
 }
 
 void Timer::sum(const Timer &T) {
+  if (&T < this) {
+    T.Lock.acquire();
+    Lock.acquire();
+  } else {
+    Lock.acquire();
+    T.Lock.acquire();
+  }
+  
   Elapsed    += T.Elapsed;
   UserTime   += T.UserTime;
   SystemTime += T.SystemTime;
   MemUsed    += T.MemUsed;
   PeakMem    += T.PeakMem;
+  
+  if (&T < this) {
+    T.Lock.release();
+    Lock.release();
+  } else {
+    Lock.release();
+    T.Lock.release();
+  }
 }
 
 /// addPeakMemoryMeasurement - This method should be called whenever memory
@@ -185,8 +206,11 @@ void Timer::addPeakMemoryMeasurement() {
   size_t MemUsed = getMemUsage();
 
   for (std::vector<Timer*>::iterator I = ActiveTimers->begin(),
-         E = ActiveTimers->end(); I != E; ++I)
+         E = ActiveTimers->end(); I != E; ++I) {
+    (*I)->Lock.acquire();
     (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase);
+    (*I)->Lock.release();
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -205,6 +229,7 @@ static ManagedStatic<Name2Timer> NamedTimers;
 static ManagedStatic<Name2Pair> NamedGroupedTimers;
 
 static Timer &getNamedRegionTimer(const std::string &Name) {
+  sys::SmartScopedLock<true> L(&*TimerLock);
   Name2Timer::iterator I = NamedTimers->find(Name);
   if (I != NamedTimers->end())
     return I->second;
@@ -214,6 +239,7 @@ static Timer &getNamedRegionTimer(const std::string &Name) {
 
 static Timer &getNamedRegionTimer(const std::string &Name,
                                   const std::string &GroupName) {
+  sys::SmartScopedLock<true> L(&*TimerLock);
 
   Name2Pair::iterator I = NamedGroupedTimers->find(GroupName);
   if (I == NamedGroupedTimers->end()) {
@@ -275,14 +301,21 @@ static void printVal(double Val, double Total, std::ostream &OS) {
 }
 
 void Timer::print(const Timer &Total, std::ostream &OS) {
+  if (&Total < this) {
+    Total.Lock.acquire();
+    Lock.acquire();
+  } else {
+    Lock.acquire();
+    Total.Lock.acquire();
+  }
+  
   if (Total.UserTime)
-    printVal(UserTime / 1000000.0, Total.UserTime / 1000000.0, OS);
+    printVal(UserTime, Total.UserTime, OS);
   if (Total.SystemTime)
-    printVal(SystemTime / 1000000.0, Total.SystemTime / 1000000.0, OS);
+    printVal(SystemTime, Total.SystemTime, OS);
   if (Total.getProcessTime())
-    printVal(getProcessTime() / 1000000.0,
-             Total.getProcessTime() / 1000000.0, OS);
-  printVal(Elapsed / 1000000.0, Total.Elapsed / 1000000.0, OS);
+    printVal(getProcessTime(), Total.getProcessTime(), OS);
+  printVal(Elapsed, Total.Elapsed, OS);
 
   OS << "  ";
 
@@ -300,6 +333,14 @@ void Timer::print(const Timer &Total, std::ostream &OS) {
   OS << Name << "\n";
 
   Started = false;  // Once printed, don't print again
+  
+  if (&Total < this) {
+    Total.Lock.release();
+    Lock.release();
+  } else {
+    Lock.release();
+    Total.Lock.release();
+  }
 }
 
 // GetLibSupportInfoOutputFile - Return a file stream to print our output on...
@@ -324,6 +365,7 @@ llvm::GetLibSupportInfoOutputFile() {
 
 
 void TimerGroup::removeTimer() {
+  sys::SmartScopedLock<true> L(&*TimerLock);
   if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report...
     // Sort the timers in descending order by amount of time taken...
     std::sort(TimersToPrint.begin(), TimersToPrint.end(),
@@ -355,23 +397,23 @@ void TimerGroup::removeTimer() {
       if (this != DefaultTimerGroup) {
         *OutStream << "  Total Execution Time: ";
 
-        printAlignedFP(Total.getProcessTime() / 1000000.0, 4, 5, *OutStream);
+        printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream);
         *OutStream << " seconds (";
-        printAlignedFP(Total.getWallTime() / 1000000.0, 4, 5, *OutStream);
+        printAlignedFP(Total.getWallTime(), 4, 5, *OutStream);
         *OutStream << " wall clock)\n";
       }
       *OutStream << "\n";
 
-      if (Total.UserTime / 1000000.0)
+      if (Total.UserTime)
         *OutStream << "   ---User Time---";
-      if (Total.SystemTime / 1000000.0)
+      if (Total.SystemTime)
         *OutStream << "   --System Time--";
-      if (Total.getProcessTime() / 1000000.0)
+      if (Total.getProcessTime())
         *OutStream << "   --User+System--";
       *OutStream << "   ---Wall Time---";
-      if (Total.getMemUsed() / 1000000.0)
+      if (Total.getMemUsed())
         *OutStream << "  ---Mem---";
-      if (Total.getPeakMem() / 1000000.0)
+      if (Total.getPeakMem())
         *OutStream << "  -PeakMem-";
       *OutStream << "  --- Name ---\n";
 
@@ -391,3 +433,13 @@ void TimerGroup::removeTimer() {
   }
 }
 
+void TimerGroup::addTimer() {
+  sys::SmartScopedLock<true> L(&*TimerLock);
+  ++NumTimers;
+}
+
+void TimerGroup::addTimerToPrint(const Timer &T) {
+  sys::SmartScopedLock<true> L(&*TimerLock);
+  TimersToPrint.push_back(Timer(true, T));
+}
+
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
index fda2708..f9b55a1 100644
--- a/lib/System/Atomic.cpp
+++ b/lib/System/Atomic.cpp
@@ -35,11 +35,11 @@ void sys::MemoryFence() {
 #endif
 }
 
-uint32_t sys::CompareAndSwap32(volatile uint32_t* ptr,
-                               uint32_t new_value,
-                               uint32_t old_value) {
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+                                  sys::cas_flag new_value,
+                                  sys::cas_flag old_value) {
 #if LLVM_MULTITHREADED==0
-  uint32_t result = *ptr;
+  sys::cas_flag result = *ptr;
   if (result == old_value)
     *ptr = new_value;
   return result;
@@ -52,7 +52,7 @@ uint32_t sys::CompareAndSwap32(volatile uint32_t* ptr,
 #endif
 }
 
-int32_t sys::AtomicIncrement32(volatile int32_t* ptr) {
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
 #if LLVM_MULTITHREADED==0
   ++(*ptr);
   return *ptr;
@@ -65,7 +65,7 @@ int32_t sys::AtomicIncrement32(volatile int32_t* ptr) {
 #endif
 }
 
-int32_t sys::AtomicDecrement32(volatile int32_t* ptr) {
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
 #if LLVM_MULTITHREADED==0
   --(*ptr);
   return *ptr;
@@ -78,7 +78,7 @@ int32_t sys::AtomicDecrement32(volatile int32_t* ptr) {
 #endif
 }
 
-int32_t sys::AtomicAdd32(volatile int32_t* ptr, int32_t val) {
+sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
 #if LLVM_MULTITHREADED==0
   *ptr += val;
   return *ptr;
@@ -91,16 +91,22 @@ int32_t sys::AtomicAdd32(volatile int32_t* ptr, int32_t val) {
 #endif
 }
 
-int64_t sys::AtomicAdd64(volatile int64_t* ptr, int64_t val) {
-#if LLVM_MULTITHREADED==0
-  *ptr += val;
-  return *ptr;
-#elif defined(__GNUC__)
-  return __sync_add_and_fetch(ptr, val);
-#elif defined(_MSC_VER)
-  return InterlockedAdd64(ptr, val);
-#else
-#  error No atomic add implementation for your platform!
-#endif
+sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original * val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
 }
 
+sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original / val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index a5a56e8..431629a 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMSystem
   Signals.cpp
   Threading.cpp
   TimeValue.cpp
+  ThreadLocal.cpp
   )
 
 if( BUILD_SHARED_LIBS AND NOT WIN32 )
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index 3bf172c..ef5c9e6 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -12,20 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/System/RWMutex.h"
 #include "llvm/Config/config.h"
 #include <cstdio>
 #include <cstring>
 #include <map>
 
 // Collection of symbol name/value pairs to be searched prior to any libraries.
-std::map<std::string, void *> &g_symbols() {
-  static std::map<std::string, void *> symbols;
-  return symbols;
-}
+static std::map<std::string, void*> symbols;
+static llvm::sys::SmartRWMutex<true> SymbolsLock;
+
 
 void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
                                           void *symbolValue) {
-  g_symbols()[symbolName] = symbolValue;
+  llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
+  symbols[symbolName] = symbolValue;
 }
 
 // It is not possible to use ltdl.c on VC++ builds as the terms of its LGPL
@@ -57,6 +59,7 @@ static std::vector<void *> OpenedHandles;
 DynamicLibrary::DynamicLibrary() {}
 
 DynamicLibrary::~DynamicLibrary() {
+  SmartScopedWriter<true> Writer(&SymbolsLock);
   while(!OpenedHandles.empty()) {
     void *H = OpenedHandles.back();   OpenedHandles.pop_back(); 
     dlclose(H);
@@ -65,6 +68,7 @@ DynamicLibrary::~DynamicLibrary() {
 
 bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
                                             std::string *ErrMsg) {
+  SmartScopedWriter<true> Writer(&SymbolsLock);                                              
   void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
   if (H == 0) {
     if (ErrMsg)
@@ -77,20 +81,28 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
 
 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
   //  check_ltdl_initialization();
-
+  
   // First check symbols added via AddSymbol().
-  std::map<std::string, void *>::iterator I = g_symbols().find(symbolName);
-  if (I != g_symbols().end())
+  SymbolsLock.reader_acquire();
+  std::map<std::string, void *>::iterator I = symbols.find(symbolName);
+  std::map<std::string, void *>::iterator E = symbols.end();
+  SymbolsLock.reader_release();
+  
+  if (I != E)
     return I->second;
 
+  SymbolsLock.writer_acquire();
   // Now search the libraries.
   for (std::vector<void *>::iterator I = OpenedHandles.begin(),
        E = OpenedHandles.end(); I != E; ++I) {
     //lt_ptr ptr = lt_dlsym(*I, symbolName);
     void *ptr = dlsym(*I, symbolName);
-    if (ptr)
+    if (ptr) {
+      SymbolsLock.writer_release();
       return ptr;
+    }
   }
+  SymbolsLock.writer_release();
 
 #define EXPLICIT_SYMBOL(SYM) \
    extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
diff --git a/lib/System/ThreadLocal.cpp b/lib/System/ThreadLocal.cpp
new file mode 100644
index 0000000..8884e79
--- /dev/null
+++ b/lib/System/ThreadLocal.cpp
@@ -0,0 +1,80 @@
+//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/System/ThreadLocal.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data(0) {
+  pthread_key_t* key = new pthread_key_t;
+  int errorcode = pthread_key_create(key, NULL);
+  assert(errorcode == 0);
+  (void) errorcode;
+  data = key;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_key_delete(*key);
+  assert(errorcode == 0);
+  (void) errorcode;
+  delete key;
+}
+
+void ThreadLocalImpl::setInstance(const void* d) {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_setspecific(*key, d);
+  assert(errorcode == 0);
+  (void) errorcode;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  return pthread_getspecific(*key);
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/ThreadLocal.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Win32/ThreadLocal.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
+#endif
+#endif
+
diff --git a/lib/System/Unix/ThreadLocal.inc b/lib/System/Unix/ThreadLocal.inc
new file mode 100644
index 0000000..83d554d3
--- /dev/null
+++ b/lib/System/Unix/ThreadLocal.inc
@@ -0,0 +1,25 @@
+//=== llvm/System/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+}
diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc
index 1ddf6ce..aa04268 100644
--- a/lib/System/Win32/DynamicLibrary.inc
+++ b/lib/System/Win32/DynamicLibrary.inc
@@ -67,6 +67,7 @@ extern "C" {
                                     PVOID UserContext)
 #endif
   {
+    llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
     // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
     // into the process.
     if (stricmp(ModuleName, "msvci70") != 0 &&
@@ -89,11 +90,13 @@ extern "C" {
 }
 
 DynamicLibrary::DynamicLibrary() : handle(0) {
+  SmartScopedWriter<true> Writer(&SymbolsLock);
   handle = GetModuleHandle(NULL);
   OpenedHandles.push_back((HMODULE)handle);
 }
 
 DynamicLibrary::~DynamicLibrary() {
+  llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
   if (handle == 0)
     return;
 
@@ -113,8 +116,9 @@ DynamicLibrary::~DynamicLibrary() {
 }
  
 bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
-                                            std::string *ErrMsg) {
+                                            std::string *ErrMsg) {                                            
   if (filename) {
+    llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
     HMODULE a_handle = LoadLibrary(filename);
 
     if (a_handle == 0)
@@ -166,17 +170,24 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
 
 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
   // First check symbols added via AddSymbol().
-  std::map<std::string, void *>::iterator I = g_symbols().find(symbolName);
-  if (I != g_symbols().end())
+  SymbolsLock.reader_acquire();
+  std::map<std::string, void *>::iterator I = symbols.find(symbolName);
+  std::map<std::string, void *>::iterator E = symbols.end();
+  SymbolsLock.reader_release();
+  if (I != E)
     return I->second;
 
   // Now search the libraries.
+  SymbolsLock.writer_acquire();
   for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
        E = OpenedHandles.end(); I != E; ++I) {
     FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
-    if (ptr)
+    if (ptr) {
+      SymbolsLock.writer_release();
       return (void *) ptr;
+    }
   }
+  SymbolsLock.writer_release();
 
 #if defined(__MINGW32__)
   {
diff --git a/lib/System/Win32/ThreadLocal.inc b/lib/System/Win32/ThreadLocal.inc
new file mode 100644
index 0000000..8ab37d9
--- /dev/null
+++ b/lib/System/Win32/ThreadLocal.inc
@@ -0,0 +1,49 @@
+//= llvm/System/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include "llvm/System/ThreadLocal.h"
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() {
+  DWORD* tls = new DWORD;
+  *tls = TlsAlloc();
+  assert(*tls != TLS_OUT_OF_INDEXES);
+  data = tls;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  TlsFree(*tls);
+  delete tls;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  return TlsGetValue(*tls);
+}
+
+void ThreadLocalImpl::setInstance(const void* d){
+  DWORD* tls = static_cast<DWORD*>(data);
+  int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
+  assert(errorcode == 0);
+}
+
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 7edd118..8bf1b7c 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -20,7 +20,7 @@
 
 namespace llvm {
 
-class ARMTargetMachine;
+class ARMBaseTargetMachine;
 class FunctionPass;
 class MachineCodeEmitter;
 class JITCodeEmitter;
@@ -28,8 +28,8 @@ class raw_ostream;
 
 // Enums corresponding to ARM condition codes
 namespace ARMCC {
-  // The CondCodes constants map directly to the 4-bit encoding of the 
-  // condition field for predicated instructions. 
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
   enum CondCodes {
     EQ,
     NE,
@@ -47,7 +47,7 @@ namespace ARMCC {
     LE,
     AL
   };
-  
+
   inline static CondCodes getOppositeCondition(CondCodes CC){
     switch (CC) {
     default: assert(0 && "Unknown condition code");
@@ -90,17 +90,17 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   }
 }
 
-FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM);
 FunctionPass *createARMCodePrinterPass(raw_ostream &O,
-                                       ARMTargetMachine &TM,
+                                       ARMBaseTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel,
                                        bool Verbose);
-FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 
-FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
-FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, 
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                           JITCodeEmitter &JCE);
 
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f6629fe..8424c2e 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -176,11 +176,11 @@ namespace {
 
 namespace llvm {
 
-FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE) {
   return new Emitter<MachineCodeEmitter>(TM, MCE);
 }
-FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                           JITCodeEmitter &JCE) {
   return new Emitter<JITCodeEmitter>(TM, JCE);
 }
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index be543a9..200371b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -41,14 +41,14 @@ static const unsigned arm_dsubreg_1 = 6;
 ///
 namespace {
 class ARMDAGToDAGISel : public SelectionDAGISel {
-  ARMTargetMachine &TM;
+  ARMBaseTargetMachine &TM;
 
   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
   /// make the right decision when generating code for different targets.
   const ARMSubtarget *Subtarget;
 
 public:
-  explicit ARMDAGToDAGISel(ARMTargetMachine &tm)
+  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm)
     : SelectionDAGISel(tm), TM(tm),
     Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
   }
@@ -92,11 +92,10 @@ public:
   bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
-  bool SelectThumb2ShifterOperandReg(SDValue Op, SDValue N,
-                                     SDValue &BaseReg, SDValue &Opc);
-
   bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
+  bool SelectT2ShifterOperandReg(SDValue Op, SDValue N,
+                                 SDValue &BaseReg, SDValue &Opc);
   
   // Include the pieces autogenerated from the target description.
 #include "ARMGenDAGISel.inc"
@@ -520,28 +519,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectThumb2ShifterOperandReg(SDValue Op,
-                                                    SDValue N,
-                                                    SDValue &BaseReg,
-                                                    SDValue &Opc) {
-  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
-
-  // Don't match base register only case. That is matched to a separate
-  // lower complexity pattern with explicit register operand.
-  if (ShOpcVal == ARM_AM::no_shift) return false;
-
-  BaseReg = N.getOperand(0);
-  unsigned ShImmVal = 0;
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)))
-    ShImmVal = RHS->getZExtValue() & 31;
-  else
-    return false;
-
-  Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
-
-  return true;
-}
-
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
                                               SDValue N,
                                               SDValue &BaseReg,
@@ -566,6 +543,26 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
+                                                SDValue &BaseReg,
+                                                SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShImmVal = RHS->getZExtValue() & 31;
+    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+    return true;
+  }
+
+  return false;
+}
+
 /// getAL - Returns a ARMCC::AL immediate node.
 static inline SDValue getAL(SelectionDAG *CurDAG) {
   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
@@ -1003,6 +1000,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
 /// createARMISelDag - This pass converts a legalized DAG into a
 /// ARM-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM) {
   return new ARMDAGToDAGISel(TM);
 }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 29d3da2..c24bb2e 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -266,7 +266,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
   setOperationAction(ISD::CTTZ,  MVT::i32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-  if (!Subtarget->hasV5TOps() || Subtarget->isThumb())
+  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 
   // Only ARMv6 has BSWAP.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index d70d2e2..d7371b0 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -742,32 +742,85 @@ class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
 class TJTI<dag outs, dag ins, string asm, list<dag> pattern>
   : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
 
-// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
-class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+// TPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+class TPat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsThumb];
 }
 
-class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsThumb, HasV5T];
 }
 
-// T2I - Thumb2 instruction.
-
-class Thumb2I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
+// Thumb1 only
+class Thumb1I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
              string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
   let OutOperandList = outs;
   let InOperandList = ins;
   let AsmString   = asm;
   let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb1Only];
+}
+
+class T1I<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb1Only];
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+              string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, HasThumb2];
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
+// an input operand since by default it's a zero register. It will
+// become an implicit def once it's "flipped".
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+  let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, HasThumb2];
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString   = asm;
+  let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb, HasThumb2];
 }
 
-class T2I<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb2I<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+class T2I<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
+
+class T2sI<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, asm, "", pattern>;
 
-// Thumb2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
-class Thumb2Pat<dag pattern, dag result> : Pat<pattern, result> {
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsThumb, HasThumb2];
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index e8da927..d95089d 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -39,11 +39,14 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
   return MIB.addReg(0);
 }
 
-ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI)
   : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
     RI(*this, STI) {
 }
 
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI) {
+}
 
 /// Return true if the instruction is a register to register move and
 /// leave the source and dest operands in the passed parameters.
@@ -65,10 +68,6 @@ bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
     DstReg = MI.getOperand(0).getReg();
     return true;
   case ARM::MOVr:
-  case ARM::tMOVr:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2hir:
     assert(MI.getDesc().getNumOperands() >= 2 &&
            MI.getOperand(0).isReg() &&
            MI.getOperand(1).isReg() &&
@@ -102,14 +101,6 @@ unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::tRestore:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
   }
   return 0;
 }
@@ -137,22 +128,15 @@ unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::tSpill:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
   }
+
   return 0;
 }
 
-void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I,
-                                 unsigned DestReg,
-                                 const MachineInstr *Orig) const {
+void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I,
+                                     unsigned DestReg,
+                                     const MachineInstr *Orig) const {
   DebugLoc dl = Orig->getDebugLoc();
   if (Orig->getOpcode() == ARM::MOVi2pieces) {
     RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(),
@@ -198,9 +182,9 @@ static unsigned getUnindexedOpcode(unsigned Opc) {
 }
 
 MachineInstr *
-ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
-                                    MachineBasicBlock::iterator &MBBI,
-                                    LiveVariables *LV) const {
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                        MachineBasicBlock::iterator &MBBI,
+                                        LiveVariables *LV) const {
   if (!EnableARM3Addr)
     return NULL;
 
@@ -261,7 +245,7 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                          get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
         .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
         .addImm(Pred).addReg(0).addReg(0);
-    } else 
+    } else
       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
         .addReg(BaseReg).addReg(OffReg)
@@ -312,7 +296,7 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     NewMIs.push_back(UpdateMI);
     NewMIs.push_back(MemMI);
   }
-  
+
   // Transfer LiveVariables states, kill / dead info.
   if (LV) {
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -320,7 +304,7 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       if (MO.isReg() && MO.getReg() &&
           TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
         unsigned Reg = MO.getReg();
-      
+
         LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
         if (MO.isDef()) {
           MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
@@ -349,18 +333,19 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
 }
 
 // Branch analysis.
-bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
-                                 MachineBasicBlock *&FBB,
-                                 SmallVectorImpl<MachineOperand> &Cond,
-                                 bool AllowModify) const {
+bool
+  ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                  MachineBasicBlock *&FBB,
+                                  SmallVectorImpl<MachineOperand> &Cond,
+                                  bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
     return false;
-  
+
   // Get the last instruction in the block.
   MachineInstr *LastInst = I;
-  
+
   // If there is only one terminator instruction, process it.
   unsigned LastOpc = LastInst->getOpcode();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
@@ -377,14 +362,14 @@ bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     }
     return true;  // Can't handle indirect branch.
   }
-  
+
   // Get the instruction before it if it is a terminator.
   MachineInstr *SecondLastInst = I;
-  
+
   // If there are three terminators, we don't know what sort of block this is.
   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
-  
+
   // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
   unsigned SecondLastOpc = SecondLastInst->getOpcode();
   if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
@@ -395,8 +380,8 @@ bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     FBB = LastInst->getOperand(0).getMBB();
     return false;
   }
-  
-  // If the block ends with two unconditional branches, handle it.  The second 
+
+  // If the block ends with two unconditional branches, handle it.  The second
   // one is not executed, so remove it.
   if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
       (LastOpc == ARM::B || LastOpc == ARM::tB)) {
@@ -417,14 +402,14 @@ bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     if (AllowModify)
       I->eraseFromParent();
     return true;
-  } 
+  }
 
   // Otherwise, can't handle this.
   return true;
 }
 
 
-unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
@@ -435,26 +420,26 @@ unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   --I;
   if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
     return 0;
-  
+
   // Remove the branch.
   I->eraseFromParent();
-  
+
   I = MBB.end();
-  
+
   if (I == MBB.begin()) return 1;
   --I;
   if (I->getOpcode() != BccOpc)
     return 1;
-  
+
   // Remove the branch.
   I->eraseFromParent();
   return 2;
 }
 
 unsigned
-ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                           MachineBasicBlock *FBB,
-                           const SmallVectorImpl<MachineOperand> &Cond) const {
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc argument
   DebugLoc dl = DebugLoc::getUnknownLoc();
   MachineFunction &MF = *MBB.getParent();
@@ -466,7 +451,7 @@ ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "ARM branch conditions have two components!");
-  
+
   if (FBB == 0) {
     if (Cond.empty()) // Unconditional branch?
       BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
@@ -475,7 +460,7 @@ ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
         .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
     return 1;
   }
-  
+
   // Two-way conditional branch.
   BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
     .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
@@ -488,43 +473,18 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                 unsigned DestReg, unsigned SrcReg,
                                 const TargetRegisterClass *DestRC,
                                 const TargetRegisterClass *SrcRC) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (!AFI->isThumbFunction()) {
-    if (DestRC == ARM::GPRRegisterClass) {
-      AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
-                                  .addReg(SrcReg)));
-      return true;
-    }
-  } else {
-    if (DestRC == ARM::GPRRegisterClass) {
-      if (SrcRC == ARM::GPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
-        return true;
-      } else if (SrcRC == ARM::tGPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
-        return true;
-      }
-    } else if (DestRC == ARM::tGPRRegisterClass) {
-      if (SrcRC == ARM::GPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
-        return true;
-      } else if (SrcRC == ARM::tGPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
-        return true;
-      }
-    }
-  }
   if (DestRC != SrcRC) {
     // Not yet supported!
     return false;
   }
 
-
-  if (DestRC == ARM::SPRRegisterClass)
+  if (DestRC == ARM::GPRRegisterClass)
+    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+                                .addReg(SrcReg)));
+  else if (DestRC == ARM::SPRRegisterClass)
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
                    .addReg(SrcReg));
   else if (DestRC == ARM::DPRRegisterClass)
@@ -534,7 +494,7 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
   else
     return false;
-  
+
   return true;
 }
 
@@ -546,19 +506,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (!AFI->isThumbFunction());
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::tGPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (AFI->isThumbFunction());
-    BuildMI(MBB, I, DL, get(ARM::tSpill))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI).addImm(0);
   } else if (RC == ARM::DPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
                    .addReg(SrcReg, getKillRegState(isKill))
@@ -579,16 +529,6 @@ void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    if (AFI->isThumbFunction()) {
-      Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
-      MachineInstrBuilder MIB = 
-        BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
-      for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-        MIB.addOperand(Addr[i]);
-      NewMIs.push_back(MIB);
-      return;
-    }
     Opc = ARM::STR;
   } else if (RC == ARM::DPRRegisterClass) {
     Opc = ARM::FSTD;
@@ -597,7 +537,7 @@ void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
     Opc = ARM::FSTS;
   }
 
-  MachineInstrBuilder MIB = 
+  MachineInstrBuilder MIB =
     BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.addOperand(Addr[i]);
@@ -614,17 +554,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (!AFI->isThumbFunction());
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
                    .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::tGPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (AFI->isThumbFunction());
-    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
-      .addFrameIndex(FI).addImm(0);
   } else if (RC == ARM::DPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
                    .addFrameIndex(FI).addImm(0));
@@ -643,15 +574,6 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    if (AFI->isThumbFunction()) {
-      Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
-      MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-      for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-        MIB.addOperand(Addr[i]);
-      NewMIs.push_back(MIB);
-      return;
-    }
     Opc = ARM::LDR;
   } else if (RC == ARM::DPRRegisterClass) {
     Opc = ARM::FLDD;
@@ -668,59 +590,6 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   return;
 }
 
-bool ARMInstrInfo::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          const std::vector<CalleeSavedInfo> &CSI) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (!AFI->isThumbFunction() || CSI.empty())
-    return false;
-
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(Reg);
-    MIB.addReg(Reg, RegState::Kill);
-  }
-  return true;
-}
-
-bool ARMInstrInfo::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI,
-                            const std::vector<CalleeSavedInfo> &CSI) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (!AFI->isThumbFunction() || CSI.empty())
-    return false;
-
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
-  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    if (Reg == ARM::LR) {
-      // Special epilogue for vararg functions. See emitEpilogue
-      if (isVarArg)
-        continue;
-      Reg = ARM::PC;
-      PopMI->setDesc(get(ARM::tPOP_RET));
-      MI = MBB.erase(MI);
-    }
-    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
-  }
-
-  // It's illegal to emit pop instruction without operands.
-  if (PopMI->getNumOperands() > 0)
-    MBB.insert(MI, PopMI);
-
-  return true;
-}
-
 MachineInstr *ARMInstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
@@ -752,31 +621,6 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     }
     break;
   }
-  case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      bool isKill = MI->getOperand(1).isKill();
-      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
-        // tSpill cannot take a high register operand.
-        break;
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
-        .addReg(SrcReg, getKillRegState(isKill))
-        .addFrameIndex(FI).addImm(0);
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
-        // tRestore cannot target a high register operand.
-        break;
-      bool isDead = MI->getOperand(0).isDead();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
-        .addFrameIndex(FI).addImm(0);
-    }
-    break;
-  }
   case ARM::FCPYS: {
     unsigned Pred = MI->getOperand(2).getImm();
     unsigned PredReg = MI->getOperand(3).getReg();
@@ -816,7 +660,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   return NewMI;
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 canFoldMemoryOperand(const MachineInstr *MI,
                      const SmallVectorImpl<unsigned> &Ops) const {
   if (Ops.size() != 1) return false;
@@ -857,9 +701,10 @@ canFoldMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+bool
+  ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   if (MBB.empty()) return false;
-  
+
   switch (MBB.back().getOpcode()) {
   case ARM::BX_RET:   // Return.
   case ARM::LDM_RET:
@@ -877,19 +722,19 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   }
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
   Cond[0].setImm(ARMCC::getOppositeCondition(CC));
   return false;
 }
 
-bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const {
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
   int PIdx = MI->findFirstPredOperandIdx();
   return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 PredicateInstruction(MachineInstr *MI,
                      const SmallVectorImpl<MachineOperand> &Pred) const {
   unsigned Opc = MI->getOpcode();
@@ -910,7 +755,7 @@ PredicateInstruction(MachineInstr *MI,
   return false;
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
                   const SmallVectorImpl<MachineOperand> &Pred2) const {
   if (Pred1.size() > 2 || Pred2.size() > 2)
@@ -937,7 +782,7 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
   }
 }
 
-bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI,
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
                                     std::vector<MachineOperand> &Pred) const {
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
@@ -966,7 +811,7 @@ static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
 
 /// GetInstSize - Return the size of the specified MachineInstr.
 ///
-unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   const MachineBasicBlock &MBB = *MI->getParent();
   const MachineFunction *MF = MBB.getParent();
   const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
@@ -974,7 +819,7 @@ unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   // Basic size info comes from the TSFlags field.
   const TargetInstrDesc &TID = MI->getDesc();
   unsigned TSFlags = TID.TSFlags;
-  
+
   switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
   default: {
     // If this machine instr is an inline asm, measure it.
@@ -1024,7 +869,7 @@ unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // FIXME: If we know the size of the function is less than (1 << 16) *2
       // bytes, we can use 16-bit entries instead. Then there won't be an
       // alignment issue.
-      return getNumJTEntries(JT, JTI) * 4 + 
+      return getNumJTEntries(JT, JTI) * 4 +
              (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
     }
     default:
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 9658f3b..131960b 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -51,14 +51,14 @@ namespace ARMII {
     Size8Bytes    = 2,
     Size4Bytes    = 3,
     Size2Bytes    = 4,
-    
+
     // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
-    // and store ops 
+    // and store ops
     IndexModeShift = 7,
     IndexModeMask  = 3 << IndexModeShift,
     IndexModePre   = 1,
     IndexModePost  = 2,
-    
+
     //===------------------------------------------------------------------===//
     // Misc flags.
 
@@ -146,10 +146,12 @@ namespace ARMII {
   };
 }
 
-class ARMInstrInfo : public TargetInstrInfoImpl {
+class ARMBaseInstrInfo : public TargetInstrInfoImpl {
   const ARMRegisterInfo RI;
+protected:
+  // Can be only subclassed.
+  explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
 public:
-  explicit ARMInstrInfo(const ARMSubtarget &STI);
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
@@ -157,17 +159,6 @@ public:
   ///
   virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; }
 
-  /// Return true if the instruction is a register to register move and return
-  /// the source and dest operands and their sub-register indices by reference.
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg, unsigned &DstReg,
-                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
-  
   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                      unsigned DestReg, const MachineInstr *Orig) const;
 
@@ -184,6 +175,54 @@ public:
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond) const;
+
+  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+
+  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  // Predication support.
+  virtual bool isPredicated(const MachineInstr *MI) const;
+
+  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+                      : ARMCC::AL;
+  }
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  /// GetInstSize - Returns the size of the specified MachineInstr.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+};
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+public:
+  explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+  /// Return true if the instruction is a register to register move and return
+  /// the source and dest operands and their sub-register indices by reference.
+  virtual bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
   virtual bool copyRegToReg(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
@@ -208,13 +247,7 @@ public:
                                SmallVectorImpl<MachineOperand> &Addr,
                                const TargetRegisterClass *RC,
                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-  
+
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
                                            const SmallVectorImpl<unsigned> &Ops,
@@ -226,37 +259,6 @@ public:
                                               MachineInstr* LoadMI) const {
     return 0;
   }
-
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-  
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-  virtual
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
-  // Predication support.
-  virtual bool isPredicated(const MachineInstr *MI) const;
-
-  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
-    int PIdx = MI->findFirstPredOperandIdx();
-    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() 
-                      : ARMCC::AL;
-  }
-
-  virtual
-  bool PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const;
-
-  virtual
-  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                         const SmallVectorImpl<MachineOperand> &Pred2) const;
-
-  virtual bool DefinesPredicate(MachineInstr *MI,
-                                std::vector<MachineOperand> &Pred) const;
-    
-  /// GetInstSize - Returns the size of the specified MachineInstr.
-  ///
-  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 7003a65..cb7b7b9 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -103,6 +103,8 @@ def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
 def IsARM     : Predicate<"!Subtarget->isThumb()">;
 def IsDarwin    : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">;
+def CarryDefIsUsed   : Predicate<"N.getNode()->hasAnyUseOfValue(1)">;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
@@ -353,28 +355,34 @@ include "ARMInstrFormats.td"
 
 /// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
 /// binop that produces a value.
-multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
+                        bit Commutable = 0> {
   def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
                opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
                opc, " $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+    let isCommutable = Commutable;
+  }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
 }
 
-/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
 /// instruction modifies the CSPR register.
 let Defs = [CPSR] in {
-multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+                         bit Commutable = 0> {
   def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
                opc, "s $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
   def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
                opc, "s $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+    let isCommutable = Commutable;
+  }
   def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                opc, "s $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
@@ -385,13 +393,16 @@ multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> {
 /// patterns. Similar to AsI1_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
 let Defs = [CPSR] in {
-multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
+                       bit Commutable = 0> {
   def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm,
                opc, " $a, $b",
                [(opnode GPR:$a, so_imm:$b)]>;
   def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm,
                opc, " $a, $b",
-               [(opnode GPR:$a, GPR:$b)]>;
+               [(opnode GPR:$a, GPR:$b)]> {
+    let isCommutable = Commutable;
+  }
   def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                opc, " $a, $b",
                [(opnode GPR:$a, so_reg:$b)]>;
@@ -430,19 +441,43 @@ multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
                   Requires<[IsARM, HasV6]>;
 }
 
-/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and
-/// setting carry bit. But it can optionally set CPSR.
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
 let Uses = [CPSR] in {
-multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
-  def ri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
-                DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
-  def rr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b, cc_out:$s),
-                DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
-  def rs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
-                DPSoRegFrm, !strconcat(opc, "${s} $dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
+  def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                DPFrm, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+               Requires<[IsARM, CarryDefIsUnused]>;
+  def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                DPFrm, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+               Requires<[IsARM, CarryDefIsUnused]> {
+    let isCommutable = Commutable;
+  }
+  def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                DPSoRegFrm, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+               Requires<[IsARM, CarryDefIsUnused]>;
+  // Carry setting variants
+  def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                DPFrm, !strconcat(opc, "s $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+               Requires<[IsARM, CarryDefIsUsed]> {
+                 let Defs = [CPSR];
+  }
+  def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                DPFrm, !strconcat(opc, "s $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+               Requires<[IsARM, CarryDefIsUsed]> {
+                 let Defs = [CPSR];
+  }
+  def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                DPSoRegFrm, !strconcat(opc, "s $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+               Requires<[IsARM, CarryDefIsUsed]> {
+                 let Defs = [CPSR];
+  }
 }
 }
 
@@ -535,7 +570,8 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo,
                                          "add$p $dst, pc, #PCRELV${:uid}")),
                    []>;
 
-def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p),
+def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
+                           (ins i32imm:$label, i32imm:$id, pred:$p),
           Pseudo,
           !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
                                          "${:private}PCRELL${:uid}+8))\n"),
@@ -899,21 +935,20 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
 //
 
 defm ADD  : AsI1_bin_irs<0b0100, "add",
-                         BinOpFrag<(add  node:$LHS, node:$RHS)>>;
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
                          BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 
 // ADD and SUB with 's' bit set.
-defm ADDS : ASI1_bin_s_irs<0b0100, "add",
-                           BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm SUBS : ASI1_bin_s_irs<0b0010, "sub",
-                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm ADDS : AI1_bin_s_irs<0b0100, "add",
+                          BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm SUBS : AI1_bin_s_irs<0b0010, "sub",
+                          BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
-// FIXME: Do not allow ADC / SBC to be predicated for now.
-defm ADC  : AsXI1_bin_c_irs<0b0101, "adc",
-                            BinOpFrag<(adde node:$LHS, node:$RHS)>>;
-defm SBC  : AsXI1_bin_c_irs<0b0110, "sbc",
-                            BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+                             BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+                             BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // These don't define reg/reg forms, because they are handled above.
 def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -934,14 +969,27 @@ def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                  [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>;
 }
 
-// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR.
 let Uses = [CPSR] in {
-def RSCri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
-                 DPFrm, "rsc${s} $dst, $a, $b",
-                 [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>;
-def RSCrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
-                 DPSoRegFrm, "rsc${s} $dst, $a, $b",
-                 [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>;
+def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                 DPFrm, "rsc", " $dst, $a, $b",
+                 [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
+                 Requires<[IsARM, CarryDefIsUnused]>;
+def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                 DPSoRegFrm, "rsc", " $dst, $a, $b",
+                 [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
+                 Requires<[IsARM, CarryDefIsUnused]>;
+}
+
+// FIXME: Allow these to be predicated.
+let Defs = [CPSR], Uses = [CPSR] in {
+def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                  DPFrm, "rscs $dst, $a, $b",
+                  [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
+                  Requires<[IsARM, CarryDefIsUnused]>;
+def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                  DPSoRegFrm, "rscs $dst, $a, $b",
+                  [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
+                  Requires<[IsARM, CarryDefIsUnused]>;
 }
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
@@ -965,11 +1013,11 @@ def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
 //
 
 defm AND   : AsI1_bin_irs<0b0000, "and",
-                          BinOpFrag<(and node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
 defm ORR   : AsI1_bin_irs<0b1100, "orr",
-                          BinOpFrag<(or  node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
 defm EOR   : AsI1_bin_irs<0b0001, "eor",
-                          BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 defm BIC   : AsI1_bin_irs<0b1110, "bic",
                           BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
@@ -991,6 +1039,7 @@ def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
 //  Multiply Instructions.
 //
 
+let isCommutable = 1 in
 def MUL   : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
                     "mul", " $dst, $a, $b",
                    [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
@@ -1001,6 +1050,7 @@ def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 
 // Extra precision multiplies with low / high results
 let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
 def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b),
                     "smull", " $ldst, $hdst, $a, $b", []>;
@@ -1008,6 +1058,7 @@ def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
 def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b),
                     "umull", " $ldst, $hdst, $a, $b", []>;
+}
 
 // Multiply + accumulate
 def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
@@ -1258,9 +1309,9 @@ defm CMN  : AI1_cmp_irs<0b1011, "cmn",
 
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
-                        BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>;
+                        BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>, 1>;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
-                        BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>;
+                        BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>, 1>;
 
 defm CMPnz : AI1_cmp_irs<0b1010, "cmp",
                          BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 1def093..7927ca5 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -128,10 +128,28 @@ PseudoInst<(outs), (ins i32imm:$amt),
 }
 
 let isNotDuplicable = 1 in
-def tPICADD : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
+def tPICADD : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
                   "$cp:\n\tadd $dst, pc",
                   [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
 
+// PC relative add.
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs),
+                  "add $dst, pc, $rhs * 4", []>;
+
+// ADD rd, sp, #imm8
+// FIXME: hard code sp?
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
+                  "add $dst, $sp, $rhs * 4 @ addrspi", []>;
+
+// ADD sp, sp, #imm7
+// FIXME: hard code sp?
+def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                  "add $dst, $rhs * 4", []>;
+
+// FIXME: Make use of the following?
+// ADD rm, sp, rm
+// ADD sp, rm
+
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
 //
@@ -276,113 +294,135 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
 //  Arithmetic Instructions.
 //
 
-// Add with carry
-let isCommutable = 1 in
-def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "adc $dst, $rhs",
-               [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
+// Add with carry register
+let isCommutable = 1, Defs = [CPSR], Uses = [CPSR] in
+def tADCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "adc $dst, $rhs",
+                [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
 
-def tADDS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "add $dst, $lhs, $rhs",
-               [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
-
-
-def tADDi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// Add immediate
+let Defs = [CPSR] in {
+def tADDi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "add $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
+def tADDSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                 "add $dst, $lhs, $rhs",
+                 [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7:$rhs))]>;
+}
 
-def tADDi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+let Defs = [CPSR] in {
+def tADDi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                  "add $dst, $rhs",
                  [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
+def tADDSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                  "add $dst, $rhs",
+                  [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255:$rhs))]>;
+}
 
-def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// Add register
+let isCommutable = 1, Defs = [CPSR] in {
+def tADDrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "add $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
+def tADDSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                 "add $dst, $lhs, $rhs",
+                 [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
+}
 
 let neverHasSideEffects = 1 in
-def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+def tADDhirr : T1It<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                    "add $dst, $rhs @ addhirr", []>;
 
-def tADDrPCi : TI<(outs tGPR:$dst), (ins i32imm:$rhs),
-                  "add $dst, pc, $rhs * 4", []>;
-
-def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
-                  "add $dst, $sp, $rhs * 4 @ addrspi", []>;
-
-def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                  "add $dst, $rhs * 4", []>;
-
-let isCommutable = 1 in
-def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// And register
+let isCommutable = 1, Defs = [CPSR] in
+def tAND : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "and $dst, $rhs",
                 [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
 
-def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// ASR immediate
+let Defs = [CPSR] in
+def tASRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "asr $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>;
 
-def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// ASR register
+let Defs = [CPSR] in
+def tASRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                  "asr $dst, $rhs",
                  [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
 
-def tBIC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// BIC register
+let Defs = [CPSR] in
+def tBIC : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "bic $dst, $rhs",
                [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
 
-
-def tCMN : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+// CMN register
+let Defs = [CPSR] in {
+def tCMN : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
               "cmn $lhs, $rhs",
               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
-
-def tCMPi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
-               "cmp $lhs, $rhs",
-               [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
-
-def tCMPr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-               "cmp $lhs, $rhs",
-               [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
-
-def tTST  : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-               "tst $lhs, $rhs",
-               [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
-
-def tCMNNZ : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+def tCMNNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                 "cmn $lhs, $rhs",
                 [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+}
 
-def tCMPNZi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+// CMP immediate
+let Defs = [CPSR] in {
+def tCMPi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+               "cmp $lhs, $rhs",
+               [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
+def tCMPNZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
                  "cmp $lhs, $rhs",
                  [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>;
 
-def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+}
+
+// CMP register
+let Defs = [CPSR] in {
+def tCMPr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "cmp $lhs, $rhs",
+               [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
+def tCMPNZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                  "cmp $lhs, $rhs",
                  [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>;
+}
 
 // TODO: A7-37: CMP(3) - cmp hi regs
 
-let isCommutable = 1 in
-def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// XOR register
+let isCommutable = 1, Defs = [CPSR] in
+def tEOR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "eor $dst, $rhs",
                [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
 
-def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// LSL immediate
+let Defs = [CPSR] in
+def tLSLri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "lsl $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
-def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// LSL register
+let Defs = [CPSR] in
+def tLSLrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                  "lsl $dst, $rhs",
                  [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
 
-def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// LSR immediate
+let Defs = [CPSR] in
+def tLSRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "lsr $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
-def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// LSR register
+let Defs = [CPSR] in
+def tLSRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                  "lsr $dst, $rhs",
                  [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
 
-// FIXME: This is not rematerializable because mov changes the condition code.
-def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
+// move register
+let Defs = [CPSR] in
+def tMOVi8 : T1I<(outs tGPR:$dst), (ins i32imm:$src),
                  "mov $dst, $src",
                  [(set tGPR:$dst, imm0_255:$src)]>;
 
@@ -392,41 +432,47 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
 // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
 // which is MOV(3).  This also supports high registers.
 let neverHasSideEffects = 1 in {
-def tMOVr       : TI<(outs tGPR:$dst), (ins tGPR:$src),
+def tMOVr       : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                       "cpy $dst, $src", []>;
-def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
+def tMOVhir2lor : T1I<(outs tGPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2lor", []>;
-def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
+def tMOVlor2hir : T1I<(outs GPR:$dst), (ins tGPR:$src),
                       "cpy $dst, $src\t@ lor2hir", []>;
-def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
+def tMOVhir2hir : T1I<(outs GPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2hir", []>;
 } // neverHasSideEffects
 
-let isCommutable = 1 in
-def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// multiply register
+let isCommutable = 1, Defs = [CPSR] in
+def tMUL : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "mul $dst, $rhs",
                [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
 
-def tMVN : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// move inverse register
+let Defs = [CPSR] in
+def tMVN : T1I<(outs tGPR:$dst), (ins tGPR:$src),
               "mvn $dst, $src",
               [(set tGPR:$dst, (not tGPR:$src))]>;
 
-def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// negate register
+let Defs = [CPSR] in
+def tNEG : T1I<(outs tGPR:$dst), (ins tGPR:$src),
               "neg $dst, $src",
               [(set tGPR:$dst, (ineg tGPR:$src))]>;
 
-let isCommutable = 1 in
-def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// bitwise or register
+let isCommutable = 1, Defs = [CPSR] in
+def tORR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "orr $dst, $rhs",
                [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
 
-
-def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// swaps
+def tREV : T1I<(outs tGPR:$dst), (ins tGPR:$src),
               "rev $dst, $src",
               [(set tGPR:$dst, (bswap tGPR:$src))]>,
               Requires<[IsThumb, HasV6]>;
 
-def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src),
+def tREV16 : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "rev16 $dst, $src",
                 [(set tGPR:$dst,
                     (or (and (srl tGPR:$src, (i32 8)), 0xFF),
@@ -435,7 +481,7 @@ def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src),
                                 (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>,
                 Requires<[IsThumb, HasV6]>;
 
-def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+def tREVSH : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "revsh $dst, $src",
                 [(set tGPR:$dst,
                    (sext_inreg
@@ -443,53 +489,78 @@ def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src),
                          (shl tGPR:$src, (i32 8))), i16))]>,
                 Requires<[IsThumb, HasV6]>;
 
-def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// rotate right register
+let Defs = [CPSR] in
+def tROR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "ror $dst, $rhs",
                 [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
 
-
-// Subtract with carry
-def tSBC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// Subtract with carry register
+let Defs = [CPSR], Uses = [CPSR] in
+def tSBCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "sbc $dst, $rhs",
                 [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
 
-def tSUBS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "sub $dst, $lhs, $rhs",
-               [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
-
-
-// TODO: A7-96: STMIA - store multiple.
-
-def tSUBi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// Subtract immediate
+let Defs = [CPSR] in {
+def tSUBi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "sub $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
+def tSUBSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                 "sub $dst, $lhs, $rhs",
+                 [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7_neg:$rhs))]>;
+}
 
-def tSUBi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+let Defs = [CPSR] in {
+def tSUBi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                   "sub $dst, $rhs",
                   [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
+def tSUBSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                   "sub $dst, $rhs",
+                   [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255_neg:$rhs))]>;
+}
 
-def tSUBrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// subtract register
+let Defs = [CPSR] in {
+def tSUBrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "sub $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
+def tSUBSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "sub $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
+}
 
-def tSUBspi : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// TODO: A7-96: STMIA - store multiple.
+
+def tSUBspi : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                   "sub $dst, $rhs * 4", []>;
 
-def tSXTB  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// sign-extend byte
+def tSXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "sxtb $dst, $src",
                 [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
                 Requires<[IsThumb, HasV6]>;
-def tSXTH  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+
+// sign-extend short
+def tSXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "sxth $dst, $src",
                 [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
                 Requires<[IsThumb, HasV6]>;
 
+// test
+let isCommutable = 1, Defs = [CPSR] in
+def tTST  : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "tst $lhs, $rhs",
+               [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
 
-def tUXTB  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// zero-extend byte
+def tUXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "uxtb $dst, $src",
                 [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
                 Requires<[IsThumb, HasV6]>;
-def tUXTH  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+
+// zero-extend short
+def tUXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "uxth $dst, $src",
                 [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
                 Requires<[IsThumb, HasV6]>;
@@ -536,35 +607,35 @@ let isCall = 1,
 //
 
 // ConstantPool, GlobalAddress
-def : ThumbPat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
-def : ThumbPat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
+def : TPat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : TPat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
 
 // JumpTable
-def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-               (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+def : TPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+           (tLEApcrelJT tjumptable:$dst, imm:$id)>;
 
 // Direct calls
-def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
-def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+def : TPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
 
 // Indirect calls to ARM routines
-def : ThumbV5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
+def : Tv5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
 
 // zextload i1 -> zextload i8
-def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr),
-               (tLDRB t_addrmode_s1:$addr)>;
+def : TPat<(zextloadi1 t_addrmode_s1:$addr),
+           (tLDRB t_addrmode_s1:$addr)>;
 
 // extload -> zextload
-def : ThumbPat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : ThumbPat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : ThumbPat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
+def : TPat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : TPat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : TPat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
 
 // Large immediate handling.
 
 // Two piece imms.
-def : ThumbPat<(i32 thumb_immshifted:$src),
-               (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
-                       (thumb_immshifted_shamt imm:$src))>;
+def : T1Pat<(i32 thumb_immshifted:$src),
+            (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+                    (thumb_immshifted_shamt imm:$src))>;
 
-def : ThumbPat<(i32 imm0_255_comp:$src),
-               (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+def : T1Pat<(i32 imm0_255_comp:$src),
+            (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e0617e4..bfdf719 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -14,9 +14,9 @@
 // Shifted operands. No register controlled shifts for Thumb2.
 // Note: We do not support rrx shifted operands yet.
 def t2_so_reg : Operand<i32>,    // reg imm
-                ComplexPattern<i32, 2, "SelectThumb2ShifterOperandReg",
+                ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
                                [shl,srl,sra,rotr]> {
-  let PrintMethod = "printSOOperand";
+  let PrintMethod = "printT2SOOperand";
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
@@ -69,6 +69,11 @@ def t2_so_imm_neg : Operand<i32>,
   let PrintMethod = "printT2SOImmOperand";
 }
 
+/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
+def imm1_31 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
 /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
 def imm0_4095 : PatLeaf<(i32 imm), [{
   return (uint32_t)N->getZExtValue() < 4096;
@@ -121,137 +126,287 @@ def t2_lo16AllZero : PatLeaf<(i32 imm), [{
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
   }], t2_hi16>;
 
+
 //===----------------------------------------------------------------------===//
-//  Thumb2 to cover the functionality of the ARM instruction set.
+// Multiclass helpers...
 //
 
-/// T2I_bin_is - Defines a set of (op reg, {so_imm|so_reg}) patterns for a
-//  binary operation that produces a value.
-multiclass T2I_bin_is<string opc, PatFrag opnode> {
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0>{
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
+   def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src),
+                opc, " $dst, $src",
+                [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
+     let isAsCheapAsAMove = Cheap;
+     let isReMaterializable = ReMat;
+   }
+   // register
+   def r : T2I<(outs GPR:$dst), (ins GPR:$src),
+               opc, " $dst, $src",
+                [(set GPR:$dst, (opnode GPR:$src))]>;
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+   def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src),
+               opc, " $dst, $src",
+               [(set GPR:$dst, (opnode t2_so_reg:$src))]>;
 }
 
-/// T2I_2bin_is - Same as T2I_bin_is except the order of operands are reversed.
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+//  binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<string opc, PatFrag opnode, bit Commutable = 0> {
+   // shifted imm
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+     let isCommutable = Commutable;
+   }
+   // shifted register
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+}
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed. It doesn't define the 'rr' form since it's handled by its
+/// T2I_bin_irs counterpart.
 multiclass T2I_rbin_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
+                opc, " $dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
    def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
+                opc, " $dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
 
-/// T2I_bin_s_is - Similar to T2I_bin_is except it sets the 's' bit so the
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
 /// instruction modifies the CPSR register.
 let Defs = [CPSR] in {
-multiclass T2I_bin_s_is<string opc, PatFrag opnode> {
+multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
    def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
-
+   // register
+   def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
+                [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+     let isCommutable = Commutable;
+   }
    // shifted register
    def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 }
 
-/// T2I_rbin_s_is - Same as T2I_bin_s_is except the order of operands are
-/// reversed.
-let Defs = [CPSR] in {
-multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
-
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
+   // 12-bit imm
+   def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                   !strconcat(opc, "w"), " $dst, $lhs, $rhs",
+                   [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+     let isCommutable = Commutable;
+   }
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
-}
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 
-/// T2I_bin_ii12s - Defines a set of (op reg, {so_imm|imm0_4095|so_reg}) patterns
-/// for a binary operation that produces a value.
-multiclass T2I_bin_ii12s<string opc, PatFrag opnode> {
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value and use and define the carry bit.
+/// It's not predicable.
+let Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
-   // 12-bit imm
-   def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                !strconcat(opc, "w $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]> {
+     let isCommutable = Commutable;
+   }
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+   // Carry setting variants
+   // shifted imm
+   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                    let Defs = [CPSR];
+                  }
+   // register
+   def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                    let Defs = [CPSR];
+                    let isCommutable = Commutable;
+   }
+   // shifted register
+   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                    let Defs = [CPSR];
+   }
+}
 }
 
-/// T2I_bin_c_is - Defines a set of (op reg, {so_imm|reg}) patterns for a
-//  binary operation that produces a value and set the carry bit. It can also
-/// optionally set CPSR.
-let Uses = [CPSR] in {
-multiclass T2I_bin_c_is<string opc, PatFrag opnode> {
+/// T2I_rsc_is - Same as T2I_adde_sube_irs except the order of operands are
+/// reversed. It doesn't define the 'rr' form since it's handled by its
+/// T2I_adde_sube_irs counterpart.
+let Defs = [CPSR], Uses = [CPSR] in {
+multiclass T2I_rsc_is<string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
-
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
+                 opc, " $dst, $rhs, $lhs",
+                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
+                 opc, " $dst, $rhs, $lhs",
+                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+   // shifted imm
+   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
+                 !strconcat(opc, "s $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                   let Defs = [CPSR];
+   }
+   // shifted register
+   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
+                 !strconcat(opc, "s $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                   let Defs = [CPSR];
+   }
 }
 }
 
-/// T2I_rbin_c_is - Same as T2I_bin_c_is except the order of operands are
-/// reversed.
-let Uses = [CPSR] in {
-multiclass T2I_rbin_c_is<string opc, PatFrag opnode> {
+/// T2I_rbin_s_is - Same as T2I_bin_s_irs except the order of operands are
+/// reversed. It doesn't define the 'rr' form since it's handled by its
+/// T2I_bin_s_irs counterpart.
+let Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
-
+   def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
+                 !strconcat(opc, "${s} $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
+   def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
+                 !strconcat(opc, "${s} $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
 }
 
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+//  rotate operation that produces a value.
+multiclass T2I_sh_ir<string opc, PatFrag opnode> {
+   // 5-bit imm
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>;
+}
 
-/// T21_cmp_irs - Defines a set of (op r, {so_imm|so_reg}) cmp / test
-/// patterns. Similar to T2I_bin_is except the instruction does not produce
+/// T21_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
 let Uses = [CPSR] in {
 multiclass T2I_cmp_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, " $lhs, $rhs"),
+                opc, " $lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_imm:$rhs)]>;
-
+   // register
+   def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs),
+                opc, " $lhs, $rhs",
+                [(opnode GPR:$lhs, GPR:$rhs)]>;
    // shifted register
    def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, " $lhs, $rhs"),
+                opc, " $lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_reg:$rhs)]>;
 }
 }
 
 //===----------------------------------------------------------------------===//
-//  Arithmetic Instructions.
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let isNotDuplicable = 1 in
+def t2PICADD : T2XI<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
+                    "$cp:\n\tadd $dst, pc",
+                    [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p),
+                   !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add$p $dst, pc, #PCRELV${:uid}")),
+                   []>;
+
+def t2LEApcrelJT : T2XI<(outs GPR:$dst),
+                       (ins i32imm:$label, i32imm:$id, pred:$p),
+          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add$p $dst, pc, #PCRELV${:uid}")),
+                   []>;
+
+// ADD rd, sp, #so_imm
+def t2ADDrSPi : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
+                     "add $dst, $sp, $imm",
+                     []>;
+
+// ADD rd, sp, #imm12
+def t2ADDrSPi12 : T2XI<(outs GPR:$dst), (ins GPR:$sp, i32imm:$imm),
+                       "addw $dst, $sp, $imm",
+                       []>;
+
+def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+                     "addw $dst, $sp, $rhs",
+                     []>;
+
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
 //
 
 //===----------------------------------------------------------------------===//
@@ -259,90 +414,95 @@ multiclass T2I_cmp_is<string opc, PatFrag opnode> {
 //
 
 let neverHasSideEffects = 1 in
-def t2MOVr : T2I<(outs GPR:$dst), (ins GPR:$src),
-                  "mov $dst, $src", []>;
+def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src),
+                   "mov", " $dst, $src", []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src),
+                   "mov", " $dst, $src",
+                   [(set GPR:$dst, t2_so_imm:$src)]>;
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src),
-                   "movw $dst, $src",
+                   "movw", " $dst, $src",
                    [(set GPR:$dst, imm0_65535:$src)]>;
 
-
-// FIXME: Move (shifted register) is a pseudo-instruction for ASR, LSL, LSR,
-// ROR, and RRX. Consider splitting into multiple instructions.
-def t2MOVs  : T2I<(outs GPR:$dst), (ins t2_so_reg:$src),
-                  "mov $dst, $src",
-                  [(set GPR:$dst, t2_so_reg:$src)]>;
-def t2MOVrx : T2I<(outs GPR:$dst), (ins GPR:$src),
-                  "mov $dst, $src, rrx",
-                  [(set GPR:$dst, (ARMrrx GPR:$src))]>;
-
-
 // FIXME: Also available in ARM mode.
 let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
-                   "movt $dst, $imm",
-                   [(set GPR:$dst,
-                         (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;
+def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+                     "movt", " $dst, $imm",
+                     [(set GPR:$dst,
+                           (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;
 
 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
 
-defm t2ADD  : T2I_bin_ii12s<"add", BinOpFrag<(add  node:$LHS, node:$RHS)>>;
-defm t2SUB  : T2I_bin_ii12s<"sub", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm t2ADD  : T2I_bin_ii12rs<"add", BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
+defm t2SUB  : T2I_bin_ii12rs<"sub", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 
 // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
-defm t2ADDS : T2I_bin_s_is<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm t2SUBS : T2I_bin_s_is<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm t2ADDS : T2I_bin_s_irs <"add",  BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <"sub",  BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
-// FIXME: predication support
-defm t2ADC  : T2I_bin_c_is<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
-defm t2SBC  : T2I_bin_c_is<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm t2ADC  : T2I_adde_sube_irs<"adc",BinOpFrag<(adde node:$LHS, node:$RHS)>,1>;
+defm t2SBC  : T2I_adde_sube_irs<"sbc",BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // RSB, RSC
-defm t2RSB  : T2I_rbin_is  <"rsb", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
-defm t2RSBS : T2I_rbin_c_is<"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
-defm t2RSC  : T2I_rbin_s_is<"rsc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm t2RSB  : T2I_rbin_is   <"rsb", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm t2RSC  : T2I_rsc_is    <"rsc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
-def : Thumb2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
-                (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
-def : Thumb2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
-                (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
+            (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
 
 
 //===----------------------------------------------------------------------===//
+//  Shift and rotate Instructions.
+//
+
+defm t2LSL  : T2I_sh_ir<"lsl", BinOpFrag<(shl  node:$LHS, node:$RHS)>>;
+defm t2LSR  : T2I_sh_ir<"lsr", BinOpFrag<(srl  node:$LHS, node:$RHS)>>;
+defm t2ASR  : T2I_sh_ir<"asr", BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
+defm t2ROR  : T2I_sh_ir<"ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src),
+                   "mov", " $dst, $src, rrx",
+                   [(set GPR:$dst, (ARMrrx GPR:$src))]>;
+
+//===----------------------------------------------------------------------===//
 //  Bitwise Instructions.
 //
 
-defm t2AND  : T2I_bin_is  <"and", BinOpFrag<(and node:$LHS, node:$RHS)>>;
-defm t2ORR  : T2I_bin_is  <"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>>;
-defm t2EOR  : T2I_bin_is  <"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm t2AND  : T2I_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm t2ORR  : T2I_bin_irs<"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+defm t2EOR  : T2I_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 
-defm t2BIC  : T2I_bin_is  <"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm t2BIC  : T2I_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
-def : Thumb2Pat<(and     GPR:$src, t2_so_imm_not:$imm),
-                (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(and     GPR:$src, t2_so_imm_not:$imm),
+            (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
 
-defm t2ORN  : T2I_bin_is  <"orn", BinOpFrag<(or  node:$LHS, (not node:$RHS))>>;
+defm t2ORN  : T2I_bin_irs<"orn", BinOpFrag<(or  node:$LHS, (not node:$RHS))>>;
 
-def : Thumb2Pat<(or      GPR:$src, t2_so_imm_not:$imm),
-                (t2ORNri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(or      GPR:$src, t2_so_imm_not:$imm),
+            (t2ORNri GPR:$src, t2_so_imm_not:$imm)>;
 
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN  : T2I_un_irs  <"mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
 
-def t2MVNr : T2I<(outs GPR:$dst), (ins t2_so_reg:$rhs),
-                  "mvn $dst, $rhs",
-                 [(set GPR:$dst, (not t2_so_reg:$rhs))]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MVNi : T2I<(outs GPR:$dst), (ins t2_so_imm_not:$rhs),
-                  "mvn $dst, $rhs",
-                 [(set GPR:$dst, t2_so_imm_not:$rhs)]>;
+def : T2Pat<(t2_so_imm_not:$src),
+            (t2MVNi t2_so_imm_not:$src)>;
 
 // A8.6.17  BFC - Bitfield clear
 // FIXME: Also available in ARM mode.
 let Constraints = "$src = $dst" in
 def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
-                "bfc $dst, $imm",
+                "bfc", " $dst, $imm",
                 [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>;
 
 // FIXME: A8.6.18  BFI - Bitfield insert (Encoding T1)
@@ -350,16 +510,17 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
 //===----------------------------------------------------------------------===//
 //  Multiply Instructions.
 //
+let isCommutable = 1 in
 def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                "mul $dst, $a, $b",
+                "mul", " $dst, $a, $b",
                 [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
 
 def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-		"mla $dst, $a, $b, $c",
+		"mla", " $dst, $a, $b, $c",
 		[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
 
 def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-		"mls $dst, $a, $b, $c",
+		"mls", " $dst, $a, $b, $c",
                 [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
 
 // FIXME: SMULL, etc.
@@ -368,20 +529,16 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 //  Misc. Arithmetic Instructions.
 //
 
-/////
-/// A8.6.31  CLZ
-/////
-// FIXME not firing? but ARM version does...
 def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "clz $dst, $src",
+                "clz", " $dst, $src",
                 [(set GPR:$dst, (ctlz GPR:$src))]>;
 
 def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "rev $dst, $src",
+                "rev", " $dst, $src",
                 [(set GPR:$dst, (bswap GPR:$src))]>;
 
 def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "rev16 $dst, $src",
+                "rev16", " $dst, $src",
                 [(set GPR:$dst,
                     (or (and (srl GPR:$src, (i32 8)), 0xFF),
                         (or (and (shl GPR:$src, (i32 8)), 0xFF00),
@@ -392,7 +549,7 @@ def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src),
 /// A8.6.137  REVSH
 /////
 def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src),
-                 "revsh $dst, $src",
+                 "revsh", " $dst, $src",
                  [(set GPR:$dst,
                     (sext_inreg
                       (or (srl (and GPR:$src, 0xFFFF), (i32 8)),
@@ -414,11 +571,11 @@ defm t2CMN   : T2I_cmp_is<"cmn",
 defm t2CMNnz : T2I_cmp_is<"cmn",
                           BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
 
-def : Thumb2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
-                (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
+            (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
 
-def : Thumb2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
-                (t2CMNri   GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
+            (t2CMNri   GPR:$src, t2_so_imm_neg:$imm)>;
 
 // FIXME: TST, TEQ, etc.
 
@@ -433,8 +590,13 @@ def : Thumb2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
 // Non-Instruction Patterns
 //
 
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>;
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
 // Large immediate handling.
 
-def : Thumb2Pat<(i32 imm:$src),
-                (t2MOVTi16 (t2MOVi16 (t2_lo16 imm:$src)),
-                           (t2_hi16 imm:$src))>;
+def : T2Pat<(i32 imm:$src),
+            (t2MOVTi16 (t2MOVi16 (t2_lo16 imm:$src)), (t2_hi16 imm:$src))>;
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
index 42b8eae..bf2c14e 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.cpp
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -43,7 +43,7 @@ const char *const llvm::arm_asm_table[] = {
   0,0
 };
 
-ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
+ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMBaseTargetMachine &TM):
   ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
@@ -55,7 +55,7 @@ ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
   SupportsDebugInformation = true;
 }
 
-ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
+ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMBaseTargetMachine &TM):
   ARMTargetAsmInfo<ELFTargetAsmInfo>(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
index 683692f..d3f2da0 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.h
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -26,7 +26,7 @@ namespace llvm {
 
   template <class BaseTAI>
   struct ARMTargetAsmInfo : public BaseTAI {
-    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) {
+    explicit ARMTargetAsmInfo(const ARMBaseTargetMachine &TM) : BaseTAI(TM) {
       BaseTAI::AsmTransCBE = arm_asm_table;
 
       BaseTAI::AlignmentIsInBytes = false;
@@ -51,11 +51,11 @@ namespace llvm {
   EXTERN_TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
 
   struct ARMDarwinTargetAsmInfo : public ARMTargetAsmInfo<DarwinTargetAsmInfo> {
-    explicit ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM);
+    explicit ARMDarwinTargetAsmInfo(const ARMBaseTargetMachine &TM);
   };
 
   struct ARMELFTargetAsmInfo : public ARMTargetAsmInfo<ELFTargetAsmInfo> {
-    explicit ARMELFTargetAsmInfo(const ARMTargetMachine &TM);
+    explicit ARMELFTargetAsmInfo(const ARMBaseTargetMachine &TM);
   };
 
 } // namespace llvm
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 8006b9b..f7b8215 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -39,13 +39,11 @@ int ARMTargetMachineModule = 0;
 static RegisterTarget<ARMTargetMachine>   X("arm",   "ARM");
 static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeARMTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeARMTarget() { }
 
 // No assembler printer by default
-ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
+ARMBaseTargetMachine::AsmPrinterCtorFn ARMBaseTargetMachine::AsmPrinterCtor = 0;
 
 /// ThumbTargetMachine - Create an Thumb architecture model.
 ///
@@ -76,34 +74,36 @@ unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) {
   return getJITMatchQuality()/2;
 }
 
-ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
-  : ARMTargetMachine(M, FS, true) {
-}
-
 /// TargetMachine ctor - Create an ARM architecture model.
 ///
-ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
-                                   bool isThumb)
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Module &M,
+                                           const std::string &FS,
+                                           bool isThumb)
   : Subtarget(M, FS, isThumb),
-    DataLayout(Subtarget.isAPCS_ABI() ?
-               // APCS ABI
-          (isThumb ?
-           std::string("e-p:32:32-f64:32:32-i64:32:32-"
-                       "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
-           std::string("e-p:32:32-f64:32:32-i64:32:32")) :
-               // AAPCS ABI
-          (isThumb ?
-           std::string("e-p:32:32-f64:64:64-i64:64:64-"
-                       "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
-           std::string("e-p:32:32-f64:64:64-i64:64:64"))),
-    InstrInfo(Subtarget),
     FrameInfo(Subtarget),
     JITInfo(),
-    TLInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
 }
 
+ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
+  : ARMBaseTargetMachine(M, FS, false), InstrInfo(Subtarget),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:32-i64:32:32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64")),
+    TLInfo(*this) {
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
+  : ARMBaseTargetMachine(M, FS, true), InstrInfo(Subtarget),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:32-i64:32:32-"
+                           "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64-"
+                           "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
+    TLInfo(*this) {
+}
+
 unsigned ARMTargetMachine::getJITMatchQuality() {
 #if defined(__arm__)
   return 10;
@@ -131,7 +131,7 @@ unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
 }
 
 
-const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+const TargetAsmInfo *ARMBaseTargetMachine::createTargetAsmInfo() const {
   switch (Subtarget.TargetType) {
    case ARMSubtarget::isDarwin:
     return new ARMDarwinTargetAsmInfo(*this);
@@ -144,22 +144,22 @@ const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
 
 
 // Pass Pipeline Configuration
-bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
+                                           CodeGenOpt::Level OptLevel) {
   PM.add(createARMISelDag(*this));
   return false;
 }
 
-bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
   if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
     PM.add(createARMLoadStoreOptimizationPass(true));
   return true;
 }
 
-bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
   if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
     PM.add(createARMLoadStoreOptimizationPass());
@@ -172,10 +172,10 @@ bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
   return true;
 }
 
-bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
-                                          bool Verbose,
-                                          raw_ostream &Out) {
+bool ARMBaseTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+                                              CodeGenOpt::Level OptLevel,
+                                              bool Verbose,
+                                              raw_ostream &Out) {
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
@@ -185,10 +185,10 @@ bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
 }
 
 
-bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm,
-                                      MachineCodeEmitter &MCE) {
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DumpAsm,
+                                          MachineCodeEmitter &MCE) {
   // FIXME: Move this to TargetJITInfo!
   if (DefRelocModel == Reloc::Default)
     setRelocationModel(Reloc::Static);
@@ -204,10 +204,10 @@ bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm,
-                                      JITCodeEmitter &JCE) {
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DumpAsm,
+                                          JITCodeEmitter &JCE) {
   // FIXME: Move this to TargetJITInfo!
   if (DefRelocModel == Reloc::Default)
     setRelocationModel(Reloc::Static);
@@ -223,10 +223,10 @@ bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm,
-                                            MachineCodeEmitter &MCE) {
+bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                                CodeGenOpt::Level OptLevel,
+                                                bool DumpAsm,
+                                                MachineCodeEmitter &MCE) {
   // Machine code emitter pass for ARM.
   PM.add(createARMCodeEmitterPass(*this, MCE));
   if (DumpAsm) {
@@ -238,10 +238,10 @@ bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm,
-                                            JITCodeEmitter &JCE) {
+bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                                CodeGenOpt::Level OptLevel,
+                                                bool DumpAsm,
+                                                JITCodeEmitter &JCE) {
   // Machine code emitter pass for ARM.
   PM.add(createARMJITCodeEmitterPass(*this, JCE));
   if (DumpAsm) {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c4c8e6c..0b49b92 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -22,18 +22,19 @@
 #include "ARMJITInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
+#include "ThumbInstrInfo.h"
 
 namespace llvm {
 
 class Module;
 
-class ARMTargetMachine : public LLVMTargetMachine {
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
   ARMSubtarget        Subtarget;
-  const TargetData    DataLayout;       // Calculates type size & alignment
-  ARMInstrInfo        InstrInfo;
+
+private:
   ARMFrameInfo        FrameInfo;
   ARMJITInfo          JITInfo;
-  ARMTargetLowering   TLInfo;
   InstrItineraryData  InstrItins;
   Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
 
@@ -41,26 +42,18 @@ protected:
   // To avoid having target depend on the asmprinter stuff libraries, asmprinter
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            ARMTargetMachine &tm,
+                                            ARMBaseTargetMachine &tm,
                                             CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
 public:
-  ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false);
+  ARMBaseTargetMachine(const Module &M, const std::string &FS, bool isThumb);
 
-  virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const ARMFrameInfo     *getFrameInfo() const { return &FrameInfo; }
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
-  virtual const ARMRegisterInfo  *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
-  }
-  virtual const TargetData       *getTargetData() const { return &DataLayout; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual       ARMTargetLowering *getTargetLowering() const {
-    return const_cast<ARMTargetLowering*>(&TLInfo);
-  }
-  virtual const InstrItineraryData getInstrItineraryData() const {  
+  virtual const InstrItineraryData getInstrItineraryData() const {
     return InstrItins;
   }
 
@@ -94,12 +87,50 @@ public:
                                     JITCodeEmitter &MCE);
 };
 
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+  ARMInstrInfo        InstrInfo;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMTargetLowering   TLInfo;
+public:
+  ARMTargetMachine(const Module &M, const std::string &FS);
+
+  virtual const ARMRegisterInfo  *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual       ARMTargetLowering *getTargetLowering() const {
+    return const_cast<ARMTargetLowering*>(&TLInfo);
+  }
+
+  virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  static unsigned getJITMatchQuality();
+  static unsigned getModuleMatchQuality(const Module &M);
+};
+
 /// ThumbTargetMachine - Thumb target machine.
 ///
-class ThumbTargetMachine : public ARMTargetMachine {
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+  ThumbInstrInfo      InstrInfo;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMTargetLowering   TLInfo;
 public:
   ThumbTargetMachine(const Module &M, const std::string &FS);
 
+  virtual const ARMRegisterInfo  *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual       ARMTargetLowering *getTargetLowering() const {
+    return const_cast<ARMTargetLowering*>(&TLInfo);
+  }
+
+  virtual const ThumbInstrInfo   *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
   static unsigned getJITMatchQuality();
   static unsigned getModuleMatchQuality(const Module &M);
 };
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index fe1c980..400f628a 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -96,9 +97,7 @@ namespace {
                       const char *Modifier = 0);
     void printSOImmOperand(const MachineInstr *MI, int opNum);
     void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
-    void printSOOperand(const MachineInstr *MI, int OpNum);
     void printSORegOperand(const MachineInstr *MI, int opNum);
-    void printT2SOImmOperand(const MachineInstr *MI, int opNum);
     void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
     void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
     void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
@@ -110,6 +109,7 @@ namespace {
     void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
                                 const char *Modifier = 0);
     void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNo);
+
     void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
     void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
                                       unsigned Scale);
@@ -117,6 +117,10 @@ namespace {
     void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
     void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
     void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+
+    void printT2SOImmOperand(const MachineInstr *MI, int opNum);
+    void printT2SOOperand(const MachineInstr *MI, int OpNum);
+
     void printPredicateOperand(const MachineInstr *MI, int opNum);
     void printSBitModifierOperand(const MachineInstr *MI, int opNum);
     void printPCLabel(const MachineInstr *MI, int opNum);
@@ -169,11 +173,6 @@ namespace {
          O << ")";
       }
       O << "\n";
-
-      // If the constant pool value is a extern weak symbol, remember to emit
-      // the weak reference.
-      if (GV && GV->hasExternalWeakLinkage())
-        ExtWeakSymbols.insert(GV);
     }
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -331,8 +330,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     if (isCallOp && Subtarget->isTargetELF() &&
         TM.getRelocationModel() == Reloc::PIC_)
       O << "(PLT)";
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     break;
   }
   case MachineOperand::MO_ExternalSymbol: {
@@ -408,32 +405,10 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
   printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
 }
 
-// Constant shifts so_reg is a 3-operand unit corresponding to register forms of
-// the A5.1 "Addressing Mode 1 - Data-processing operands" forms.  This
-// includes:
-// REG 0 - e.g. R5
-// REG IMM, SH_OPC - e.g. R5, LSL #3
-void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
-  unsigned Reg = MO1.getReg();
-  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-  O << TM.getRegisterInfo()->getAsmName(Reg);
-
-  // Print the shift opc.
-  O << ", "
-    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
-    << " ";
-
-  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
-  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
-}
-
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
 // "Addressing Mode 1 - Data-processing operands" forms.  This includes:
-//    REG 0   0    - e.g. R5
-//    REG REG 0,SH_OPC     - e.g. R5, ROR R3
+//    REG 0   0           - e.g. R5
+//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
 //    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
 void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
@@ -457,24 +432,6 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
   }
 }
 
-static void printT2SOImm(raw_ostream &O, int64_t V) {
-  unsigned Imm = ARM_AM::getT2SOImmValDecode(V);
-  
-  // Always print the immediate directly, as the "rotate" form
-  // is deprecated in some contexts.
-  O << "#" << Imm;
-}
-
-/// printT2SOImmOperand - T2SOImm is:
-///  1. a 4-bit splat control value and 8 bit immediate value
-///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
-///     represented by a normalizedin 7-bit value (msb is always 1)
-void ARMAsmPrinter::printT2SOImmOperand(const MachineInstr *MI, int OpNum) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-  printT2SOImm(O, MO.getImm());
-}
-
 void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
@@ -643,8 +600,8 @@ void
 ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO = MI->getOperand(Op);
   uint32_t v = ~MO.getImm();
-  int32_t lsb = ffs (v) - 1;
-  int32_t width = fls (v) - lsb;
+  int32_t lsb = CountTrailingZeros_32(v);
+  int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
   assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
   O << "#" << lsb << ", #" << width;
 }
@@ -702,6 +659,42 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   O << "]";
 }
 
+/// printT2SOImmOperand - T2SOImm is:
+///  1. a 4-bit splat control value and 8 bit immediate value
+///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
+///     represented by a normalizedin 7-bit value (msb is always 1)
+void ARMAsmPrinter::printT2SOImmOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Not a valid so_imm value!");
+
+  unsigned Imm = ARM_AM::getT2SOImmValDecode(MO.getImm());  
+  // Always print the immediate directly, as the "rotate" form
+  // is deprecated in some contexts.
+  O << "#" << Imm;
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  O << TM.getRegisterInfo()->getAsmName(Reg);
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+    << " ";
+
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+
 void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm();
   if (CC != ARMCC::AL)
@@ -749,10 +742,6 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
       EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
     } else {
       EmitGlobalConstant(MCPE.Val.ConstVal);
-      // remember to emit the weak reference
-      if (const GlobalValue *GV = dyn_cast<GlobalValue>(MCPE.Val.ConstVal))
-        if (GV->hasExternalWeakLinkage())
-          ExtWeakSymbols.insert(GV);
     }
   }
 }
@@ -934,6 +923,8 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -1046,12 +1037,6 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   if (TAI->hasDotTypeDotSizeDirective())
     O << "\t.size " << name << ", " << Size << "\n";
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -1135,18 +1120,12 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
     }
 
 
-    // Emit initial debug information.
-    DW->EndModule();
-
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
     // implementation of multiple entry points).  If this doesn't occur, the
     // linker can safely perform dead code stripping.  Since LLVM never
     // generates code that does this, it is always safe to set.
     O << "\t.subsections_via_symbols\n";
-  } else {
-    // Emit final debug information for ELF.
-    DW->EndModule();
   }
 
   return AsmPrinter::doFinalization(M);
@@ -1158,7 +1137,7 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
 /// regardless of whether the function is in SSA form.
 ///
 FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
-                                             ARMTargetMachine &tm,
+                                             ARMBaseTargetMachine &tm,
                                              CodeGenOpt::Level OptLevel,
                                              bool verbose) {
   return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
@@ -1167,13 +1146,10 @@ FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
 namespace {
   static struct Register {
     Register() {
-      ARMTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
+      ARMBaseTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
     }
   } Registrator;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeARMAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() { }
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
index c22964f..a67fc84 100644
--- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMARMAsmPrinter
   ARMAsmPrinter.cpp
   )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 2ac40f5..e665ed9 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -24,4 +24,5 @@ add_llvm_target(ARMCodeGen
   ARMSubtarget.cpp
   ARMTargetAsmInfo.cpp
   ARMTargetMachine.cpp
+  ThumbInstrInfo.cpp
   )
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 0252a4a..4223699 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -96,20 +96,7 @@ Which would be better.  This occurs in png decode.
 //===---------------------------------------------------------------------===//
 
 More load / store optimizations:
-1) Look past instructions without side-effects (not load, store, branch, etc.)
-   when forming the list of loads / stores to optimize.
-
-2) Smarter register allocation?
-We are probably missing some opportunities to use ldm / stm. Consider:
-
-ldr r5, [r0]
-ldr r4, [r0, #4]
-
-This cannot be merged into a ldm. Perhaps we will need to do the transformation
-before register allocation. Then teach the register allocator to allocate a
-chunk of consecutive registers.
-
-3) Better representation for block transfer? This is from Olden/power:
+1) Better representation for block transfer? This is from Olden/power:
 
 	fldd d0, [r4]
 	fstd d0, [r4, #+32]
@@ -123,7 +110,7 @@ chunk of consecutive registers.
 If we can spare the registers, it would be better to use fldm and fstm here.
 Need major register allocator enhancement though.
 
-4) Can we recognize the relative position of constantpool entries? i.e. Treat
+2) Can we recognize the relative position of constantpool entries? i.e. Treat
 
 	ldr r0, LCPI17_3
 	ldr r1, LCPI17_4
@@ -147,13 +134,7 @@ L6:
 	.long	-858993459
 	.long	1074318540
 
-5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
-ldrd/strd instead if there are only two destination registers that form an
-odd/even pair. However, we probably would pay a penalty if the address is not
-aligned on 8-byte boundary. This requires more information on load / store
-nodes (and MI's?) then we currently carry.
-
-6) struct copies appear to be done field by field 
+3) struct copies appear to be done field by field 
 instead of by words, at least sometimes:
 
 struct foo { int x; short s; char c1; char c2; };
@@ -313,11 +294,6 @@ See McCat/18-imp/ComputeBoundingBoxes for an example.
 
 //===---------------------------------------------------------------------===//
 
-Register scavenging is now implemented.  The example in the previous version
-of this document produces optimal code at -O2.
-
-//===---------------------------------------------------------------------===//
-
 Pre-/post- indexed load / stores:
 
 1) We should not make the pre/post- indexed load/store transform if the base ptr
@@ -353,20 +329,6 @@ time.
 
 //===---------------------------------------------------------------------===//
 
-We should add i64 support to take advantage of the 64-bit load / stores.
-We can add a pseudo i64 register class containing pseudo registers that are
-register pairs. All other ops (e.g. add, sub) would be expanded as usual.
-
-We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
-from the i64 register. These are single moves which can be eliminated if the
-destination register is a sub-register of the source. We should implement proper
-subreg support in the register allocator to coalesce these away.
-
-There are other minor issues such as multiple instructions for a spill / restore
-/ move.
-
-//===---------------------------------------------------------------------===//
-
 Implement support for some more tricky ways to materialize immediates.  For
 example, to get 0xffff8000, we can use:
 
@@ -465,12 +427,6 @@ More register scavenging work:
 1. Use the register scavenger to track frame index materialized into registers
    (those that do not fit in addressing modes) to allow reuse in the same BB.
 2. Finish scavenging for Thumb.
-3. We know some spills and restores are unnecessary. The issue is once live
-   intervals are merged, they are not never split. So every def is spilled
-   and every use requires a restore if the register allocator decides the
-   resulting live interval is not assigned a physical register. It may be
-   possible (with the help of the scavenger) to turn some spill / restore
-   pairs into register copies.
 
 //===---------------------------------------------------------------------===//
 
@@ -572,3 +528,5 @@ those operations and the ARMv6 scalar versions.
 
 //===---------------------------------------------------------------------===//
 
+ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
+ARMInstrInfo::commuteInstruction() to support it.
diff --git a/lib/Target/ARM/ThumbInstrInfo.cpp b/lib/Target/ARM/ThumbInstrInfo.cpp
new file mode 100644
index 0000000..075d940
--- /dev/null
+++ b/lib/Target/ARM/ThumbInstrInfo.cpp
@@ -0,0 +1,282 @@
+//===- ThumbInstrInfo.cpp - Thumb Instruction Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "ThumbInstrInfo.h"
+
+using namespace llvm;
+
+ThumbInstrInfo::ThumbInstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI) {
+}
+
+bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI,
+                                 unsigned &SrcReg, unsigned &DstReg,
+                                 unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  unsigned oc = MI.getOpcode();
+  switch (oc) {
+  default:
+    return false;
+  // FIXME: Thumb2
+  case ARM::tMOVr:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2hir:
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid Thumb MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  // FIXME: Thumb2
+  case ARM::tRestore:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                            int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  // FIXME: Thumb2
+  case ARM::tSpill:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  const TargetRegisterClass *DestRC,
+                                  const TargetRegisterClass *SrcRC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  // FIXME: Thumb2
+  if (DestRC == ARM::GPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+      return true;
+    }
+  } else if (DestRC == ARM::tGPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void ThumbInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  // FIXME: Thumb2
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tSpill))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                    bool isKill,
+                                    SmallVectorImpl<MachineOperand> &Addr,
+                                    const TargetRegisterClass *RC,
+                                   SmallVectorImpl<MachineInstr*> &NewMIs) const{
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  // FIXME: Thumb2. Is GPRRegClass here correct?
+  assert(RC == ARM::GPRRegisterClass && "Unknown regclass!");
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+  }
+
+  MachineInstrBuilder MIB =
+    BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+void ThumbInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  // FIXME: Thumb2
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void ThumbInstrInfo::
+loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  // FIXME: Thumb2. Is GPRRegClass ok here?
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+bool ThumbInstrInfo::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    MIB.addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool ThumbInstrInfo::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (CSI.empty())
+    return false;
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      PopMI->setDesc(get(ARM::tPOP_RET));
+      MI = MBB.erase(MI);
+    }
+    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+  }
+
+  // It's illegal to emit pop instruction without operands.
+  if (PopMI->getNumOperands() > 0)
+    MBB.insert(MI, PopMI);
+
+  return true;
+}
+
+MachineInstr *ThumbInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
+  if (Ops.size() != 1) return NULL;
+  const ARMRegisterInfo &RI = getRegisterInfo();
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  MachineInstr *NewMI = NULL;
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        break;
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        break;
+      bool isDead = MI->getOperand(0).isDead();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addFrameIndex(FI).addImm(0);
+    }
+    break;
+  }
+  }
+
+  return NewMI;
+}
diff --git a/lib/Target/ARM/ThumbInstrInfo.h b/lib/Target/ARM/ThumbInstrInfo.h
new file mode 100644
index 0000000..dcf1095
--- /dev/null
+++ b/lib/Target/ARM/ThumbInstrInfo.h
@@ -0,0 +1,85 @@
+//===- ThumbInstrInfo.h - Thumb Instruction Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMBINSTRUCTIONINFO_H
+#define THUMBINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class ThumbInstrInfo : public ARMBaseInstrInfo {
+public:
+  explicit ThumbInstrInfo(const ARMSubtarget &STI);
+
+  /// Return true if the instruction is a register to register move and return
+  /// the source and dest operands and their sub-register indices by reference.
+  virtual bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const {
+    return 0;
+  }
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+};
+}
+
+#endif // THUMBINSTRUCTIONINFO_H
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
index 3fecb19..ba7478e 100644
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Config/alloca.h"
 #include "llvm/Support/Debug.h"
 #include <cstdlib>
-#include <map>
 using namespace llvm;
 
 #define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \
@@ -237,11 +236,6 @@ static long getLower16(long l)
 
 void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
                             unsigned NumRelocs, unsigned char* GOTBase) {
-  //because gpdist are paired and relative to the pc of the first inst,
-  //we need to have some state
-
-  static std::map<std::pair<void*, int>, void*> gpdistmap;
-
   for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
     unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
     long idx = 0;
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
index edff990..ecb467f 100644
--- a/lib/Target/Alpha/AlphaJITInfo.h
+++ b/lib/Target/Alpha/AlphaJITInfo.h
@@ -15,6 +15,7 @@
 #define ALPHA_JITINFO_H
 
 #include "llvm/Target/TargetJITInfo.h"
+#include <map>
 
 namespace llvm {
   class TargetMachine;
@@ -22,6 +23,10 @@ namespace llvm {
   class AlphaJITInfo : public TargetJITInfo {
   protected:
     TargetMachine &TM;
+    
+    //because gpdist are paired and relative to the pc of the first inst,
+    //we need to have some state
+    std::map<std::pair<void*, int>, void*> gpdistmap;
   public:
     explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
     { useGOT = true; }
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index feee6e4..0ff53c7 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -54,7 +54,7 @@ static long getLower16(long l)
 
 AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
   : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
-    TII(tii)
+    TII(tii), curgpdist(0)
 {
 }
 
@@ -206,8 +206,6 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
                  MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
   bool FP = hasFP(MF);
 
-  static int curgpdist = 0;
-
   //handle GOP offset
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
     .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index c4f5f7b..5012fe8 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -60,6 +60,9 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
 
   static std::string getPrettyName(unsigned reg);
+  
+private:
+  mutable int curgpdist;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index cdd4fa4..10952eb 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -27,10 +27,8 @@ static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
 // No assembler printer by default
 AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0;
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeAlphaTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaTarget() { }
 
 const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
   return new AlphaTargetAsmInfo(*this);
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 7b73bb3..e0c0a64 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -17,6 +17,7 @@
 #include "AlphaInstrInfo.h"
 #include "AlphaTargetMachine.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Type.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -121,8 +122,6 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
   case MachineOperand::MO_GlobalAddress: {
     GlobalValue *GV = MO.getGlobal();
     O << Mang->getValueName(GV);
-    if (GV->isDeclaration() && GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     return;
   }
 
@@ -224,6 +223,8 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
@@ -265,12 +266,6 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   O << name << ":\n";
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -304,11 +299,8 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeAlphaAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaAsmPrinter() { }
 
 namespace {
   static struct Register {
diff --git a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
index bf04762..992c218 100644
--- a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMAlphaAsmPrinter
   AlphaAsmPrinter.cpp
   )
+add_dependencies(LLVMAlphaAsmPrinter AlphaCodeGenTable_gen)
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index c3554f6..294c6d3 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -59,10 +59,8 @@ int CBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CTargetMachine> X("c", "C backend");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeCBackendTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCBackendTarget() { }
 
 namespace {
   /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
@@ -102,12 +100,13 @@ namespace {
     std::set<Function*> intrinsicPrototypesAlreadyGenerated;
     std::set<const Argument*> ByValParams;
     unsigned FPCounter;
+    unsigned OpaqueCounter;
 
   public:
     static char ID;
     explicit CWriter(raw_ostream &o)
       : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), 
-        TheModule(0), TAsm(0), TD(0) {
+        TheModule(0), TAsm(0), TD(0), OpaqueCounter(0) {
       FPCounter = 0;
     }
 
@@ -647,8 +646,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
   }
 
   case Type::OpaqueTyID: {
-    static int Count = 0;
-    std::string TyName = "struct opaque_" + itostr(Count++);
+    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
     assert(TypeNames.find(Ty) == TypeNames.end());
     TypeNames[Ty] = TyName;
     return Out << TyName << ' ' << NameSoFar;
@@ -752,8 +750,7 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
   }
 
   case Type::OpaqueTyID: {
-    static int Count = 0;
-    std::string TyName = "struct opaque_" + itostr(Count++);
+    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
     assert(TypeNames.find(Ty) == TypeNames.end());
     TypeNames[Ty] = TyName;
     return Out << TyName << ' ' << NameSoFar;
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
index 0dad083..9684e63 100644
--- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ include_directories(
 add_llvm_library(LLVMCellSPUAsmPrinter
   SPUAsmPrinter.cpp
   )
+add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 26a8ece..02b625b 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
@@ -361,9 +362,6 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) {
       }
     }
     O << Name;
-
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     return;
   }
 
@@ -524,6 +522,8 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   printVisibility(name, GVar->getVisibility());
 
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -584,12 +584,6 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   PrintUnmangledNameSafely(GVar, O);
   O << "'\n";
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -600,9 +594,6 @@ bool LinuxAsmPrinter::doFinalization(Module &M) {
        I != E; ++I)
     printModuleLevelGV(I);
 
-  // Emit initial debug information.
-  DW->EndModule();
-
   return AsmPrinter::doFinalization(M);
 }
 
@@ -617,11 +608,8 @@ FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
   return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeCellSPUAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() { }
 
 namespace {
   static struct Register {
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index c675ebb..256d63d 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -32,10 +32,8 @@ namespace {
 // No assembler printer by default
 SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0;
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeCellSPUTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUTarget() { }
 
 const std::pair<unsigned, int> *
 SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 1feea96..28f58e8 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -82,10 +82,8 @@ int CppBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeCppBackendTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCppBackendTarget() { }
 
 namespace {
   typedef std::vector<const Type*> TypeList;
@@ -1836,7 +1834,9 @@ namespace {
                               const std::string& mName) {
     nl(Out) << "Module* " << fname << "() {";
     nl(Out,1) << "// Module Construction";
-    nl(Out) << "Module* mod = new Module(\"" << mName << "\");";
+    nl(Out) << "Module* mod = new Module(\"";
+    printEscapedString(mName);
+    Out << "\");";
     if (!TheModule->getTargetTriple().empty()) {
       nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
     }
@@ -1869,7 +1869,9 @@ namespace {
   void CppWriter::printContents(const std::string& fname,
                                 const std::string& mName) {
     Out << "\nModule* " << fname << "(Module *mod) {\n";
-    Out << "\nmod->setModuleIdentifier(\"" << mName << "\");\n";
+    Out << "\nmod->setModuleIdentifier(\"";
+    printEscapedString(mName);
+    Out << "\");\n";
     printModuleBody();
     Out << "\nreturn mod;\n";
     Out << "\n}\n";
diff --git a/lib/Target/IA64/AsmPrinter/CMakeLists.txt b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
index b81ed4a..ffe0eed 100644
--- a/lib/Target/IA64/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ include_directories(
 add_llvm_library(LLVMIA64AsmPrinter
   IA64AsmPrinter.cpp
   )
+add_dependencies(LLVMIA64AsmPrinter IA64CodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index 662c667..6b34a4e 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -20,6 +20,7 @@
 #include "IA64.h"
 #include "IA64TargetMachine.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
@@ -269,6 +270,8 @@ void IA64AsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   O << "\n\n";
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
@@ -384,8 +387,5 @@ namespace {
 }
 
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeIA64AsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeIA64AsmPrinter() { }
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 0b93ee5..4b05e1d 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -26,10 +26,8 @@ static RegisterTarget<IA64TargetMachine> X("ia64",
 // No assembler printer by default
 IA64TargetMachine::AsmPrinterCtorFn IA64TargetMachine::AsmPrinterCtor = 0;
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeIA64Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeIA64Target() { }
 
 const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
   return new IA64TargetAsmInfo(*this);
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 0aff14f..ee73c38 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -55,10 +55,8 @@ int MSILTargetMachineModule = 0;
 
 static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeMSILTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMSILTarget() { }
 
 bool MSILModule::runOnModule(Module &M) {
   ModulePtr = &M;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 0f5244d..b1fe758 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -35,10 +35,8 @@ int MSP430TargetMachineModule = 0;
 static RegisterTarget<MSP430TargetMachine>
 X("msp430", "MSP430 [experimental]");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeMSP430Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430Target() { }
 
 MSP430TargetMachine::MSP430TargetMachine(const Module &M,
                                          const std::string &FS) :
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
index 942548d..197cc29 100644
--- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ include_directories(
 add_llvm_library(LLVMMipsAsmPrinter
   MipsAsmPrinter.cpp
   )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 077ec96..431630b 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -483,6 +484,8 @@ printModuleLevelGV(const GlobalVariable* GVar) {
   O << "\n\n";
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *CTy = C->getType();
   unsigned Size = TD->getTypeAllocSize(CTy);
   const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
@@ -587,8 +590,5 @@ namespace {
   } Registrator;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeMipsAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMipsAsmPrinter() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 83b9b62..c5f117b 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -34,10 +34,8 @@ static RegisterTarget<MipselTargetMachine>  Y("mipsel", "Mipsel");
 MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0;
 
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeMipsTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMipsTarget() { }
 
 const TargetAsmInfo *MipsTargetMachine::
 createTargetAsmInfo() const 
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 27551cd..4300588 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -300,7 +300,7 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
     // Get mangleddd name for this structure/union  element.
     std::string MangMemName = ElementName + UniqueSuffix;
     PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
-    short Class;
+    short Class = 0;
     if( CTy.getTag() == dwarf::DW_TAG_union_type)
       Class = PIC16Dbg::C_MOU;
     else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 122af70..ec1db90 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -97,6 +97,16 @@ static const char *getIntrinsicName(unsigned opcode) {
   case RTLIB::SUB_F32: Basename = "sub.f32"; break;
   case RTLIB::MUL_F32: Basename = "mul.f32"; break;
   case RTLIB::DIV_F32: Basename = "div.f32"; break;
+
+  // Floating point comparison
+  case RTLIB::O_F32: Basename = "unordered.f32"; break;
+  case RTLIB::UO_F32: Basename = "unordered.f32"; break;
+  case RTLIB::OLE_F32: Basename = "le.f32"; break;
+  case RTLIB::OGE_F32: Basename = "ge.f32"; break;
+  case RTLIB::OLT_F32: Basename = "lt.f32"; break;
+  case RTLIB::OGT_F32: Basename = "gt.f32"; break;
+  case RTLIB::OEQ_F32: Basename = "eq.f32"; break;
+  case RTLIB::UNE_F32: Basename = "neq.f32"; break;
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -187,6 +197,25 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
   setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32));
 
+  // Floationg point comparison
+  setLibcallName(RTLIB::UO_F32, getIntrinsicName(RTLIB::UO_F32));
+  setLibcallName(RTLIB::OLE_F32, getIntrinsicName(RTLIB::OLE_F32));
+  setLibcallName(RTLIB::OGE_F32, getIntrinsicName(RTLIB::OGE_F32));
+  setLibcallName(RTLIB::OLT_F32, getIntrinsicName(RTLIB::OLT_F32));
+  setLibcallName(RTLIB::OGT_F32, getIntrinsicName(RTLIB::OGT_F32));
+  setLibcallName(RTLIB::OEQ_F32, getIntrinsicName(RTLIB::OEQ_F32));
+  setLibcallName(RTLIB::UNE_F32, getIntrinsicName(RTLIB::UNE_F32));
+
+  // Return value comparisons of floating point calls. 
+  setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
   setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
   setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
 
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index 20f926d..43d47ae 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -109,7 +109,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
 
   // If this insn is not going to access any memory, return.
   const TargetInstrDesc &TID = TII->get(MI->getOpcode());
-  if (! (TID.isCall() || TID.mayLoad() || TID.mayStore()))
+  if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore()))
     return false;
 
   // Scan for the memory address operand.
@@ -119,8 +119,9 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
   for (unsigned i = 0; i < NumOperands; i++) {
     MachineOperand Op = MI->getOperand(i);
     if (Op.getType() ==  MachineOperand::MO_GlobalAddress ||
-        Op.getType() ==  MachineOperand::MO_ExternalSymbol) {
-      // We found one mem operand. Next one should be BS.
+        Op.getType() ==  MachineOperand::MO_ExternalSymbol || 
+        Op.getType() ==  MachineOperand::MO_MachineBasicBlock) {
+      // We found one mem operand. Next one may be BS.
       MemOpPos = i;
       break;
     }
@@ -133,7 +134,8 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
   MachineOperand &Op = MI->getOperand(MemOpPos);
 
   // If this is a pagesel material, handle it first.
-  if (MI->getOpcode() == PIC16::CALL) {
+  if (MI->getOpcode() == PIC16::CALL ||
+      MI->getOpcode() == PIC16::br_uncond) {
     DebugLoc dl = MI->getDebugLoc();
     BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).
       addOperand(Op);
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index d4f46a4..4304732 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -37,10 +37,8 @@ X("pic16", "PIC16 14-bit [experimental].");
 static RegisterTarget<CooperTargetMachine> 
 Y("cooper", "PIC16 Cooper [experimental].");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializePIC16Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializePIC16Target() { }
 
 // PIC16TargetMachine - Traditional PIC16 Machine.
 PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
index 1f80b17..236b264 100644
--- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMPowerPCAsmPrinter
   PPCAsmPrinter.cpp
   )
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index c7bfb6d..c5aa6ae 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
@@ -194,8 +195,6 @@ namespace {
             std::string Name = Mang->getValueName(GV);
             FnStubs.insert(Name);
             printSuffixedName(Name, "$stub");
-            if (GV->hasExternalWeakLinkage())
-              ExtWeakSymbols.insert(GV);
             return;
           }
         }
@@ -295,20 +294,17 @@ namespace {
 
   /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
   class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
-    DwarfWriter *DW;
-    MachineModuleInfo *MMI;
   public:
     explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
                                 const TargetAsmInfo *T, CodeGenOpt::Level OL,
                                 bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {}
+      : PPCAsmPrinter(O, TM, T, OL, V){}
 
     virtual const char *getPassName() const {
       return "Linux PPC Assembly Printer";
     }
 
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
     bool doFinalization(Module &M);
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -324,14 +320,12 @@ namespace {
   /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
   /// OS X
   class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter {
-    DwarfWriter *DW;
-    MachineModuleInfo *MMI;
     raw_ostream &OS;
   public:
     explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
                                  const TargetAsmInfo *T, CodeGenOpt::Level OL,
                                  bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0), OS(O) {}
+      : PPCAsmPrinter(O, TM, T, OL, V), OS(O) {}
 
     virtual const char *getPassName() const {
       return "Darwin PPC Assembly Printer";
@@ -403,17 +397,12 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
           GVStubs.insert(Name);
           printSuffixedName(Name, "$non_lazy_ptr");
         }
-        if (GV->hasExternalWeakLinkage())
-          ExtWeakSymbols.insert(GV);
         return;
       }
     }
     O << Name;
 
     printOffset(MO.getOffset());
-
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     return;
   }
 
@@ -644,15 +633,6 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
-  bool Result = AsmPrinter::doInitialization(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMI);
-  SwitchToSection(TAI->getTextSection());
-  return Result;
-}
-
 /// PrintUnmangledNameSafely - Print out the printable characters in the name.
 /// Don't print things like \\n or \\0.
 static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
@@ -677,6 +657,8 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   printVisibility(name, GVar->getVisibility());
 
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -743,12 +725,6 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   }
   O << '\n';
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -759,11 +735,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
        I != E; ++I)
     printModuleLevelGV(I);
 
-  // TODO
-
-  // Emit initial debug information.
-  DW->EndModule();
-
   return AsmPrinter::doFinalization(M);
 }
 
@@ -866,8 +837,6 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
   O << "\t.machine " << CPUDirectives[Directive] << '\n';
 
   bool Result = AsmPrinter::doInitialization(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
@@ -987,12 +956,6 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   }
   O << '\n';
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -1100,8 +1063,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (TAI->doesSupportExceptionHandling() && MMI) {
     // Add the (possibly multiple) personalities to the set of global values.
     // Only referenced functions get into the Personalities list.
-    const std::vector<Function *>& Personalities = MMI->getPersonalities();
-
+    const std::vector<Function *> &Personalities = MMI->getPersonalities();
     for (std::vector<Function *>::const_iterator I = Personalities.begin(),
            E = Personalities.end(); I != E; ++I)
       if (*I) GVStubs.insert("_" + (*I)->getName());
@@ -1139,10 +1101,6 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
     }
   }
 
-
-  // Emit initial debug information.
-  DW->EndModule();
-
   // Funny Darwin hack: This flag tells the linker that no global symbols
   // contain code that falls through to other global symbols (e.g. the obvious
   // implementation of multiple entry points).  If this doesn't occur, the
@@ -1185,8 +1143,5 @@ namespace {
 extern "C" int PowerPCAsmPrinterForceLink;
 int PowerPCAsmPrinterForceLink = 0;
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializePowerPCAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() { }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3e89885..2f95d7e 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -35,10 +35,8 @@ X("ppc32", "PowerPC 32");
 static RegisterTarget<PPC64TargetMachine>
 Y("ppc64", "PowerPC 64");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializePowerPCTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCTarget() { }
 
 // No assembler printer by default
 PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
index de905a9..e3ca18e 100644
--- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMSparcAsmPrinter
   SparcAsmPrinter.cpp
   )
+add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 6a2fdca..cb23f62 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -46,11 +47,12 @@ namespace {
     ///
     typedef std::map<const Value *, unsigned> ValueMapTy;
     ValueMapTy NumberForBB;
+    unsigned BBNumber;
   public:
     explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level OL,
                              bool V)
-      : AsmPrinter(O, TM, T, OL, V) {}
+      : AsmPrinter(O, TM, T, OL, V), BBNumber(0) {}
 
     virtual const char *getPassName() const {
       return "Sparc Assembly Printer";
@@ -101,7 +103,6 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   // BBNumber is used here so that a given Printer will never give two
   // BBs the same name. (If you have a better way, please let me know!)
-  static unsigned BBNumber = 0;
 
   O << "\n\n";
 
@@ -253,6 +254,8 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   O << "\n\n";
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignment(GVar);
 
@@ -362,8 +365,5 @@ namespace {
   } Registrator;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeSparcAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() { }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index fd0f124..aef238d 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -25,10 +25,8 @@ static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
 SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0;
 
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeSparcTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeSparcTarget() { }
 
 const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
   // FIXME: Handle Solaris subtarget someday :)
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index 2079a9f..a28c826 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter
   X86AsmPrinter.cpp
   X86IntelAsmPrinter.cpp
   )
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index 60ed4f0..e75cfc5 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -23,10 +23,13 @@
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Support/CommandLine.h"
@@ -41,18 +44,26 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
 static cl::opt<bool> NewAsmPrinter("experimental-asm-printer",
                                    cl::Hidden);
 
-static std::string getPICLabelString(unsigned FnNum,
-                                     const TargetAsmInfo *TAI,
-                                     const X86Subtarget* Subtarget) {
-  std::string label;
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+void X86ATTAsmPrinter::PrintPICBaseSymbol() const {
   if (Subtarget->isTargetDarwin())
-    label =  "\"L" + utostr_32(FnNum) + "$pb\"";
+    O << "\"L" << getFunctionNumber() << "$pb\"";
   else if (Subtarget->isTargetELF())
-    label = ".Lllvm$" + utostr_32(FnNum) + "." "$piclabel";
+    O << ".Lllvm$" << getFunctionNumber() << "." "$piclabel";
   else
     assert(0 && "Don't know how to print PIC label!\n");
+}
 
-  return label;
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+       Name != E; ++Name)
+    if (isprint(*Name))
+      OS << *Name;
 }
 
 static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
@@ -89,15 +100,6 @@ static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
   return Info;
 }
 
-/// PrintUnmangledNameSafely - Print out the printable characters in the name.
-/// Don't print things like \\n or \\0.
-static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
-  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
-       Name != E; ++Name)
-    if (isprint(*Name))
-      OS << *Name;
-}
-
 /// decorateName - Query FunctionInfoMap and use this information for various
 /// name decoration.
 void X86ATTAsmPrinter::decorateName(std::string &Name,
@@ -152,6 +154,8 @@ void X86ATTAsmPrinter::decorateName(std::string &Name,
   }
 }
 
+
+
 void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
   const Function *F = MF.getFunction();
 
@@ -159,9 +163,12 @@ void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
 
   SwitchToSection(TAI->SectionForGlobal(F));
 
+  // FIXME: A function's alignment should be part of MachineFunction.  There
+  // shouldn't be a policy decision here.
   unsigned FnAlign = 4;
   if (F->hasFnAttr(Attribute::OptimizeForSize))
     FnAlign = 1;
+  
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
@@ -283,13 +290,8 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-static inline bool shouldPrintGOT(TargetMachine &TM, const X86Subtarget* ST) {
-  return ST->isPICStyleGOT() && TM.getRelocationModel() == Reloc::PIC_;
-}
-
 static inline bool shouldPrintPLT(TargetMachine &TM, const X86Subtarget* ST) {
-  return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_ &&
-      (ST->isPICStyleRIPRel() || ST->isPICStyleGOT());
+  return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_;
 }
 
 static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
@@ -324,6 +326,8 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     }
     
     if (shouldPrintStub(TM, Subtarget)) {
+      // DARWIN/X86-32 in != static mode.
+      
       // Link-once, declaration, or Weakly-linked global variables need
       // non-lazily-resolved stubs
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
@@ -354,9 +358,8 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
         O << Name;
       }
     } else {
-      if (GV->hasDLLImportLinkage()) {
+      if (GV->hasDLLImportLinkage())
         O << "__imp_";
-      }
       O << Name;
       
       if (shouldPrintPLT(TM, Subtarget)) {
@@ -370,9 +373,6 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
         FnStubs.insert(Name);
     }
     
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-    
     printOffset(MO.getOffset());
     
     if (needCloseParen)
@@ -386,7 +386,9 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     Name += MO.getSymbolName();
     // Print function stub suffix unless it's Mac OS X 10.5 and up.
     if (shouldPrintStub(TM, Subtarget) && 
+        // DARWIN/X86-32 in != static mode.
         !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+      
       FnStubs.insert(Name);
       printSuffixedName(Name, "$stub");
       return;
@@ -401,23 +403,15 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     
     O << Name;
     
-    if (shouldPrintPLT(TM, Subtarget)) {
-      std::string GOTName(TAI->getGlobalPrefix());
-      GOTName+="_GLOBAL_OFFSET_TABLE_";
-      if (Name == GOTName)
-        // HACK! Emit extra offset to PC during printing GOT offset to
-        // compensate for the size of popl instruction. The resulting code
-        // should look like:
-        //   call .piclabel
-        // piclabel:
-        //   popl %some_register
-        //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
-        O << " + [.-"
-          << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-      
-      O << "@PLT";
+    if (MO.getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) {
+      O << " + [.-";
+      PrintPICBaseSymbol();
+      O << ']';
     }
     
+    if (shouldPrintPLT(TM, Subtarget))
+      O << "@PLT";
+    
     if (needCloseParen)
       O << ')';
     
@@ -427,9 +421,10 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
 }
 
 void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
-                                    const char *Modifier, bool NotRIPRel) {
+                                    const char *Modifier) {
   const MachineOperand &MO = MI->getOperand(OpNo);
   switch (MO.getType()) {
+  default: assert(0 && "unknown operand type!");
   case MachineOperand::MO_Register: {
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Virtual registers should not make it this far!");
@@ -456,18 +451,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     if (!isMemOp) O << '$';
     O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
       << MO.getIndex();
-
-    if (TM.getRelocationModel() == Reloc::PIC_) {
-      if (Subtarget->isPICStyleStub())
-        O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
-          << "$pb\"";
-      else if (Subtarget->isPICStyleGOT())
-        O << "@GOTOFF";
-    }
-
-    if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
-      O << "(%rip)";
-    return;
+    break;
   }
   case MachineOperand::MO_ConstantPoolIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
@@ -475,38 +459,17 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
       << MO.getIndex();
 
-    if (TM.getRelocationModel() == Reloc::PIC_) {
-      if (Subtarget->isPICStyleStub())
-        O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
-          << "$pb\"";
-      else if (Subtarget->isPICStyleGOT())
-        O << "@GOTOFF";
-    }
-
     printOffset(MO.getOffset());
-
-    if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
-      O << "(%rip)";
-    return;
+    break;
   }
   case MachineOperand::MO_GlobalAddress: {
     bool isMemOp = Modifier && !strcmp(Modifier, "mem");
-    bool needCloseParen = false;
 
     const GlobalValue *GV = MO.getGlobal();
-    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-    if (!GVar) {
-      // If GV is an alias then use the aliasee for determining
-      // thread-localness.
-      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-        GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
-    }
-
-    bool isThreadLocal = GVar && GVar->isThreadLocal();
-
     std::string Name = Mang->getValueName(GV);
     decorateName(Name, GV);
 
+    bool needCloseParen = false;
     if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
@@ -517,6 +480,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     }
 
     if (shouldPrintStub(TM, Subtarget)) {
+      // DARWIN/X86-32 in != static mode.
+
       // Link-once, declaration, or Weakly-linked global variables need
       // non-lazily-resolved stubs
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
@@ -539,118 +504,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         O << Name;
       }
 
-      if (TM.getRelocationModel() == Reloc::PIC_)
-        O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        O << '-';
+        PrintPICBaseSymbol();
+      }        
     } else {
       if (GV->hasDLLImportLinkage())
         O << "__imp_";
       O << Name;
     }
 
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
     printOffset(MO.getOffset());
 
     if (needCloseParen)
       O << ')';
     
-    bool isRIPRelative = false;
-    if (isThreadLocal) {
-      TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
-      switch (model) {
-      case TLSModel::GeneralDynamic:
-        O << "@TLSGD";
-        break;
-      case TLSModel::LocalDynamic:
-        // O << "@TLSLD"; // local dynamic not implemented
-        O << "@TLSGD";
-        break;
-      case TLSModel::InitialExec:
-        if (Subtarget->is64Bit()) {
-          assert (!NotRIPRel);
-          O << "@GOTTPOFF";
-          isRIPRelative = true;
-        } else {
-          O << "@INDNTPOFF";
-        }
-        break;
-      case TLSModel::LocalExec:
-        if (Subtarget->is64Bit())
-          O << "@TPOFF";
-        else
-          O << "@NTPOFF";
-        break;
-      default:
-        assert (0 && "Unknown TLS model");
-      }
-    } else if (isMemOp) {
-      if (shouldPrintGOT(TM, Subtarget)) {
-        if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
-          O << "@GOT";
-        else
-          O << "@GOTOFF";
-      } else if (Subtarget->isPICStyleRIPRel() &&
-                 !NotRIPRel) {
-        if (TM.getRelocationModel() != Reloc::Static) {
-          if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
-            O << "@GOTPCREL";
-        }
-        
-        isRIPRelative = true;
-      }
-    }
-
-    // Use rip when possible to reduce code size, except when
-    // index or base register are also part of the address. e.g.
-    // foo(%rip)(%rcx,%rax,4) is not legal.
-    if (isRIPRelative)
-      O << "(%rip)";
-    
-    return;
+    break;
   }
-  case MachineOperand::MO_ExternalSymbol: {
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
-    bool needCloseParen = false;
-    std::string Name(TAI->getGlobalPrefix());
-    Name += MO.getSymbolName();
-
-    // Print function stub suffix unless it's Mac OS X 10.5 and up.
-    if (!isMemOp)
-      O << '$';
-    else if (Name[0] == '$') {
-      // The name begins with a dollar-sign. In order to avoid having it look
-      // like an integer immediate to the assembler, enclose it in parens.
-      O << '(';
-      needCloseParen = true;
-    }
-
-    O << Name;
-
-    if (shouldPrintPLT(TM, Subtarget)) {
-      std::string GOTName(TAI->getGlobalPrefix());
-      GOTName+="_GLOBAL_OFFSET_TABLE_";
-      if (Name == GOTName)
-        // HACK! Emit extra offset to PC during printing GOT offset to
-        // compensate for the size of popl instruction. The resulting code
-        // should look like:
-        //   call .piclabel
-        // piclabel:
-        //   popl %some_register
-        //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
-        O << " + [.-"
-          << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-    }
-
-    if (needCloseParen)
-      O << ')';
-
-    if (Subtarget->isPICStyleRIPRel())
-      O << "(%rip)";
-    return;
+  case MachineOperand::MO_ExternalSymbol:
+    /// NOTE: MO_ExternalSymbol in a non-pcrel_imm context is *only* generated
+    /// by _GLOBAL_OFFSET_TABLE_ on X86-32.  All others are call operands, which
+    /// are pcrel_imm's.
+    assert(!Subtarget->is64Bit() && !Subtarget->isPICStyleRIPRel());
+    // These are never used as memory operands.
+    assert(!(Modifier && !strcmp(Modifier, "mem")));
+    
+    O << '$';
+    O << TAI->getGlobalPrefix();
+    O << MO.getSymbolName();
+    break;
   }
+  
+  switch (MO.getTargetFlags()) {
   default:
-    O << "<unknown operand type>"; return;
+    assert(0 && "Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:
+    break;
+  case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+    O << " + [.-";
+    PrintPICBaseSymbol();
+    O << ']';
+    break;      
+  case X86II::MO_PIC_BASE_OFFSET:
+    O << '-';
+    PrintPICBaseSymbol();
+    break;
+  case X86II::MO_TLSGD:     O << "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     O << "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    O << "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  O << "@GOTPCREL";  break;
+  case X86II::MO_GOT:       O << "@GOT";       break;
+  case X86II::MO_GOTOFF:    O << "@GOTOFF";    break;
   }
 }
 
@@ -670,25 +576,24 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
 }
 
 void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
-                                            const char *Modifier,
-                                            bool NotRIPRel) {
+                                            const char *Modifier) {
   MachineOperand BaseReg  = MI->getOperand(Op);
   MachineOperand IndexReg = MI->getOperand(Op+2);
   const MachineOperand &DispSpec = MI->getOperand(Op+3);
 
-  NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
   if (DispSpec.isGlobal() ||
       DispSpec.isCPI() ||
       DispSpec.isJTI() ||
       DispSpec.isSymbol()) {
-    printOperand(MI, Op+3, "mem", NotRIPRel);
+    printOperand(MI, Op+3, "mem");
   } else {
     int DispVal = DispSpec.getImm();
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
       O << DispVal;
   }
 
-  if (IndexReg.getReg() || BaseReg.getReg()) {
+  if ((IndexReg.getReg() || BaseReg.getReg()) &&
+      (Modifier == 0 || strcmp(Modifier, "no-rip"))) {
     unsigned ScaleVal = MI->getOperand(Op+1).getImm();
     unsigned BaseRegOperand = 0, IndexRegOperand = 2;
 
@@ -716,14 +621,14 @@ void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
 }
 
 void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
-                                         const char *Modifier, bool NotRIPRel){
+                                         const char *Modifier) {
   assert(isMem(MI, Op) && "Invalid memory reference!");
   MachineOperand Segment = MI->getOperand(Op+4);
   if (Segment.getReg()) {
       printOperand(MI, Op+4, Modifier);
       O << ':';
     }
-  printLeaMemReference(MI, Op, Modifier, NotRIPRel);
+  printLeaMemReference(MI, Op, Modifier);
 }
 
 void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
@@ -741,13 +646,19 @@ void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
   if (Subtarget->isPICStyleRIPRel())
     O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << uid << '\n';
-  else
-    O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << '\n';
+  else {
+    O << '-';
+    PrintPICBaseSymbol();
+    O << '\n';
+  }
 }
 
+
 void X86ATTAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
-  std::string label = getPICLabelString(getFunctionNumber(), TAI, Subtarget);
-  O << label << '\n' << label << ':';
+  PrintPICBaseSymbol();
+  O << '\n';
+  PrintPICBaseSymbol();
+  O << ':';
 }
 
 
@@ -810,7 +721,7 @@ bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     switch (ExtraCode[0]) {
     default: return true;  // Unknown modifier.
     case 'c': // Don't print "$" before a global var name or constant.
-      printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+      printOperand(MI, OpNo, "mem");
       return false;
     case 'b': // Print QImode register
     case 'h': // Print QImode high register
@@ -823,8 +734,19 @@ bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       return false;
 
     case 'P': // Don't print @PLT, but do print as memory.
-      printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+      printOperand(MI, OpNo, "mem");
       return false;
+
+      case 'n': { // Negate the immediate or print a '-' before the operand.
+      // Note: this is a temporary solution. It should be handled target
+      // independently as part of the 'MC' work.
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      if (MO.isImm()) {
+        O << -MO.getImm();
+        return false;
+      }
+      O << '-';
+    }
     }
   }
 
@@ -849,7 +771,7 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
       // These only apply to registers, ignore on mem.
       break;
     case 'P': // Don't print @PLT, but do print as memory.
-      printMemReference(MI, OpNo, "mem", /*NotRIPRel=*/true);
+      printMemReference(MI, OpNo, "no-rip");
       return false;
     }
   }
@@ -931,8 +853,13 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 
 /// doInitialization
 bool X86ATTAsmPrinter::doInitialization(Module &M) {
-  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) 
-    MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  if (NewAsmPrinter) {
+    Context = new MCContext();
+    // FIXME: Send this to "O" instead of outs().  For now, we force it to
+    // stdout to make it easy to compare.
+    Streamer = createAsmStreamer(*Context, outs());
+  }
+  
   return AsmPrinter::doInitialization(M);
 }
 
@@ -956,6 +883,8 @@ void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -1068,25 +997,6 @@ void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   EmitGlobalConstant(C);
 }
 
-/// printGVStub - Print stub for a global value.
-///
-void X86ATTAsmPrinter::printGVStub(const char *GV, const char *Prefix) {
-  printSuffixedName(GV, "$non_lazy_ptr", Prefix);
-  O << ":\n\t.indirect_symbol ";
-  if (Prefix) O << Prefix;
-  O << GV << "\n\t.long\t0\n";
-}
-
-/// printHiddenGVStub - Print stub for a hidden global value.
-///
-void X86ATTAsmPrinter::printHiddenGVStub(const char *GV, const char *Prefix) {
-  EmitAlignment(2);
-  printSuffixedName(GV, "$non_lazy_ptr", Prefix);
-  if (Prefix) O << Prefix;
-  O << ":\n" << TAI->getData32bitsDirective() << GV << '\n';
-}
-
-
 bool X86ATTAsmPrinter::doFinalization(Module &M) {
   // Print out module-level global variables here.
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
@@ -1095,100 +1005,62 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
 
     if (I->hasDLLExportLinkage())
       DLLExportedGVs.insert(Mang->makeNameProper(I->getName(),""));
-
-    // If the global is a extern weak symbol, remember to emit the weak
-    // reference!
-    // FIXME: This is rather hacky, since we'll emit references to ALL weak
-    // stuff, not used. But currently it's the only way to deal with extern weak
-    // initializers hidden deep inside constant expressions.
-    if (I->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(I);
-  }
-
-  for (Module::const_iterator I = M.begin(), E = M.end();
-       I != E; ++I) {
-    // If the global is a extern weak symbol, remember to emit the weak
-    // reference!
-    // FIXME: This is rather hacky, since we'll emit references to ALL weak
-    // stuff, not used. But currently it's the only way to deal with extern weak
-    // initializers hidden deep inside constant expressions.
-    if (I->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(I);
   }
 
-  // Output linker support code for dllexported globals
-  if (!DLLExportedGVs.empty())
-    SwitchToDataSection(".section .drectve");
-
-  for (StringSet<>::iterator i = DLLExportedGVs.begin(),
-         e = DLLExportedGVs.end();
-         i != e; ++i)
-    O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n";
-
-  if (!DLLExportedFns.empty()) {
-    SwitchToDataSection(".section .drectve");
-  }
-
-  for (StringSet<>::iterator i = DLLExportedFns.begin(),
-         e = DLLExportedFns.end();
-         i != e; ++i)
-    O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n";
-
   if (Subtarget->isTargetDarwin()) {
     SwitchToDataSection("");
-
-    // Output stubs for dynamically-linked functions
-    for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
-         i != e; ++i) {
-      SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs,"
-                          "self_modifying_code+pure_instructions,5", 0);
-      const char *p = i->getKeyData();
-      printSuffixedName(p, "$stub");
-      O << ":\n"
-           "\t.indirect_symbol " << p << "\n"
-           "\thlt ; hlt ; hlt ; hlt ; hlt\n";
-    }
-
-    O << '\n';
-
-    // Print global value stubs.
-    bool InStubSection = false;
+    
+    // Add the (possibly multiple) personalities to the set of global value
+    // stubs.  Only referenced functions get into the Personalities list.
     if (TAI->doesSupportExceptionHandling() && MMI && !Subtarget->is64Bit()) {
-      // Add the (possibly multiple) personalities to the set of global values.
-      // Only referenced functions get into the Personalities list.
-      const std::vector<Function *>& Personalities = MMI->getPersonalities();
-      for (std::vector<Function *>::const_iterator I = Personalities.begin(),
-             E = Personalities.end(); I != E; ++I) {
-        if (!*I)
+      const std::vector<Function*> &Personalities = MMI->getPersonalities();
+      for (unsigned i = 0, e = Personalities.size(); i != e; ++i) {
+        if (Personalities[i] == 0)
           continue;
-        if (!InStubSection) {
-          SwitchToDataSection(
-                     "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
-          InStubSection = true;
-        }
-        printGVStub((*I)->getNameStart(), "_");
+        std::string Name = Mang->getValueName(Personalities[i]);
+        decorateName(Name, Personalities[i]);
+        GVStubs.insert(Name);
       }
     }
 
+    // Output stubs for dynamically-linked functions
+    if (!FnStubs.empty()) {
+      for (StringSet<>::iterator I = FnStubs.begin(), E = FnStubs.end();
+           I != E; ++I) {
+        SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs,"
+                            "self_modifying_code+pure_instructions,5", 0);
+        const char *Name = I->getKeyData();
+        printSuffixedName(Name, "$stub");
+        O << ":\n"
+             "\t.indirect_symbol " << Name << "\n"
+             "\thlt ; hlt ; hlt ; hlt ; hlt\n";
+      }
+      O << '\n';
+    }
+
     // Output stubs for external and common global variables.
-    if (!InStubSection && !GVStubs.empty())
+    if (!GVStubs.empty()) {
       SwitchToDataSection(
                     "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
-    for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end();
-         i != e; ++i)
-      printGVStub(i->getKeyData());
+      for (StringSet<>::iterator I = GVStubs.begin(), E = GVStubs.end();
+           I != E; ++I) {
+        const char *Name = I->getKeyData();
+        printSuffixedName(Name, "$non_lazy_ptr");
+        O << ":\n\t.indirect_symbol " << Name << "\n\t.long\t0\n";
+      }
+    }
 
     if (!HiddenGVStubs.empty()) {
       SwitchToSection(TAI->getDataSection());
-      for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end();
-           i != e; ++i)
-        printHiddenGVStub(i->getKeyData());
+      EmitAlignment(2);
+      for (StringSet<>::iterator I = HiddenGVStubs.begin(),
+           E = HiddenGVStubs.end(); I != E; ++I) {
+        const char *Name = I->getKeyData();
+        printSuffixedName(Name, "$non_lazy_ptr");
+        O << ":\n" << TAI->getData32bitsDirective() << Name << '\n';
+      }
     }
 
-    // Emit final debug information.
-    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
-      DW->EndModule();
-
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
     // implementation of multiple entry points).  If this doesn't occur, the
@@ -1204,17 +1076,40 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
         << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
         << ";\t.endef\n";
     }
-
-    // Emit final debug information.
-    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
-      DW->EndModule();
-  } else if (Subtarget->isTargetELF()) {
-    // Emit final debug information.
-    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
-      DW->EndModule();
   }
-
-  return AsmPrinter::doFinalization(M);
+  
+  
+  // Output linker support code for dllexported globals on windows.
+  if (!DLLExportedGVs.empty()) {
+    SwitchToDataSection(".section .drectve");
+  
+    for (StringSet<>::iterator i = DLLExportedGVs.begin(),
+         e = DLLExportedGVs.end(); i != e; ++i)
+      O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n";
+  }
+  
+  if (!DLLExportedFns.empty()) {
+    SwitchToDataSection(".section .drectve");
+  
+    for (StringSet<>::iterator i = DLLExportedFns.begin(),
+         e = DLLExportedFns.end();
+         i != e; ++i)
+      O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n";
+  }
+  
+  // Do common shutdown.
+  bool Changed = AsmPrinter::doFinalization(M);
+  
+  if (NewAsmPrinter) {
+    Streamer->Finish();
+    
+    delete Streamer;
+    delete Context;
+    Streamer = 0;
+    Context = 0;
+  }
+  
+  return Changed;
 }
 
 // Include the auto-generated portion of the assembly writer.
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index 68a6bc8..bd96115 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -27,17 +27,23 @@
 namespace llvm {
 
 class MachineJumpTableInfo;
+class MCContext;
 class MCInst;
+class MCStreamer;
 
 class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
-  MachineModuleInfo *MMI;
   const X86Subtarget *Subtarget;
+  
+  MCContext *Context;
+  MCStreamer *Streamer;
  public:
   explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
                             bool V)
-    : AsmPrinter(O, TM, T, OL, V), MMI(0) {
+    : AsmPrinter(O, TM, T, OL, V) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
+    Context = 0;
+    Streamer = 0;
   }
 
   virtual const char *getPassName() const {
@@ -69,7 +75,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   bool printInstruction(const MCInst *MI);
 
   void printOperand(const MCInst *MI, unsigned OpNo,
-                    const char *Modifier = 0, bool NotRIPRel = false);
+                    const char *Modifier = 0);
   void printMemReference(const MCInst *MI, unsigned Op);
   void printLeaMemReference(const MCInst *MI, unsigned Op);
   void printSSECC(const MCInst *MI, unsigned Op);
@@ -117,7 +123,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
-                    const char *Modifier = 0, bool NotRIPRel = false);
+                    const char *Modifier = 0);
   void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
@@ -165,9 +171,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printMachineInstruction(const MachineInstr *MI);
   void printSSECC(const MachineInstr *MI, unsigned Op);
   void printMemReference(const MachineInstr *MI, unsigned Op,
-                         const char *Modifier=NULL, bool NotRIPRel = false);
+                         const char *Modifier=NULL);
   void printLeaMemReference(const MachineInstr *MI, unsigned Op,
-                            const char *Modifier=NULL, bool NotRIPRel = false);
+                            const char *Modifier=NULL);
   void printPICJumpTableSetLabel(unsigned uid,
                                  const MachineBasicBlock *MBB) const;
   void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
@@ -181,9 +187,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printPICLabel(const MachineInstr *MI, unsigned Op);
   void printModuleLevelGV(const GlobalVariable* GVar);
 
-  void printGVStub(const char *GV, const char *Prefix = NULL);
-  void printHiddenGVStub(const char *GV, const char *Prefix = NULL);
-
+  void PrintPICBaseSymbol() const;
+  
   bool runOnMachineFunction(MachineFunction &F);
 
   void emitFunctionHeader(const MachineFunction &MF);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 9d50edc..fa0ee75 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -65,7 +65,7 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
 
 
 void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    const char *Modifier, bool NotRIPRel) {
+                                    const char *Modifier) {
   assert(Modifier == 0 && "Modifiers should not be used");
   
   const MCOperand &Op = MI->getOperand(OpNo);
@@ -93,13 +93,11 @@ void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
-  bool NotRIPRel = false;
 
   const MCOperand &BaseReg  = MI->getOperand(Op);
   const MCOperand &IndexReg = MI->getOperand(Op+2);
   const MCOperand &DispSpec = MI->getOperand(Op+3);
   
-  NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
   if (DispSpec.isImm()) {
     int64_t DispVal = DispSpec.getImm();
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
@@ -108,7 +106,7 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
     abort();
     //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
     //       DispSpec.isJTI() || DispSpec.isSymbol());
-    //printOperand(MI, Op+3, "mem", NotRIPRel);
+    //printOperand(MI, Op+3, "mem");
   }
   
   if (IndexReg.getReg() || BaseReg.getReg()) {
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index a39203b..d1623d6 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -47,8 +47,5 @@ namespace {
 extern "C" int X86AsmPrinterForceLink;
 int X86AsmPrinterForceLink = 0;
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeX86AsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmPrinter() { }
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8df138d..4d26364 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -55,6 +55,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
                                       "Support SSE 4a instructions">;
 
+def FeatureAVX     : SubtargetFeature<"avx", "HasAVX", "true",
+                                      "Enable AVX instructions">;
+def FeatureFMA3    : SubtargetFeature<"fma3", "HasFMA3", "true",
+                                      "Enable three-operand fused multiple-add">;
+def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
+                                      "Enable four-operand fused multiple-add">;
+
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
 //===----------------------------------------------------------------------===//
@@ -82,6 +89,9 @@ def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
+// Sandy Bridge does not have FMA
+def : Proc<"sandybridge",     [FeatureSSE42,  FeatureAVX,   Feature64Bit]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureMMX,    Feature3DNow]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index e988a5c..d5846a0 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -301,7 +301,7 @@ bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) {
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
-                                    int DispVal, intptr_t PCAdj) {
+                                                 int DispVal, intptr_t PCAdj) {
   // If this is a simple integer displacement that doesn't require a relocation,
   // emit it now.
   if (!RelocOp) {
@@ -371,8 +371,10 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
   // Is a SIB byte needed?
   if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
       IndexReg.getReg() == 0 &&
-      (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
-    if (BaseReg == 0) {  // Just a displacement?
+      (BaseReg == 0 || BaseReg == X86::RIP ||
+       getX86RegNum(BaseReg) != N86::ESP)) {
+    if (BaseReg == 0 ||
+        BaseReg == X86::RIP) {  // Just a displacement?
       // Emit special case [disp32] encoding
       MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
       
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 2bcfd76..8a21b35 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -396,8 +396,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
           // Constant-offset addressing.
           Disp += CI->getSExtValue() * S;
         } else if (IndexReg == 0 &&
-                   (!AM.GV ||
-                    !getTargetMachine()->symbolicAddressesAreRIPRel()) &&
+                   (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
                    (S == 1 || S == 2 || S == 4 || S == 8)) {
           // Scaled-index addressing.
           Scale = S;
@@ -432,7 +431,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
       return false;
 
     // RIP-relative addresses can't have additional register operands.
-    if (getTargetMachine()->symbolicAddressesAreRIPRel() &&
+    if (Subtarget->isPICStyleRIPRel() &&
         (AM.Base.Reg != 0 || AM.IndexReg != 0))
       return false;
 
@@ -443,6 +442,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
 
     // Set up the basic address.
     AM.GV = GV;
+    
     if (!isCall &&
         TM.getRelocationModel() == Reloc::PIC_ &&
         !Subtarget->is64Bit())
@@ -481,12 +481,16 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
 
       // Prevent loading GV stub multiple times in same MBB.
       LocalValueMap[V] = AM.Base.Reg;
+    } else if (Subtarget->isPICStyleRIPRel()) {
+      // Use rip-relative addressing if we can.
+      AM.Base.Reg = X86::RIP;
     }
+    
     return true;
   }
 
   // If all else fails, try to materialize the value in a register.
-  if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) {
+  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
     if (AM.Base.Reg == 0) {
       AM.Base.Reg = getRegForValue(V);
       return AM.Base.Reg != 0;
@@ -1140,12 +1144,10 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
     return false;
   unsigned CalleeOp = 0;
   GlobalValue *GV = 0;
-  if (CalleeAM.Base.Reg != 0) {
-    assert(CalleeAM.GV == 0);
-    CalleeOp = CalleeAM.Base.Reg;
-  } else if (CalleeAM.GV != 0) {
-    assert(CalleeAM.GV != 0);
+  if (CalleeAM.GV != 0) {
     GV = CalleeAM.GV;
+  } else if (CalleeAM.Base.Reg != 0) {
+    CalleeOp = CalleeAM.Base.Reg;
   } else
     return false;
 
@@ -1493,15 +1495,22 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
   
   // x86-32 PIC requires a PIC base register for constant pools.
   unsigned PICBase = 0;
-  if (TM.getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->is64Bit())
-    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+  unsigned char OpFlag = 0;
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub()) {
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+      PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+    } else if (Subtarget->isPICStyleGOT()) {
+      OpFlag = X86II::MO_GOTOFF;
+      PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+    }
+  }
 
   // Create the load from the constant pool.
   unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
   unsigned ResultReg = createResultReg(RC);
-  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), MCPOffset,
-                           PICBase);
+  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
+                           MCPOffset, PICBase, OpFlag);
 
   return ResultReg;
 }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9cedafc..1336177 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -65,7 +65,6 @@ namespace {
       int FrameIndex;
     } Base;
 
-    bool isRIPRel;     // RIP as base?
     unsigned Scale;
     SDValue IndexReg; 
     int32_t Disp;
@@ -75,15 +74,35 @@ namespace {
     const char *ES;
     int JT;
     unsigned Align;    // CP alignment.
+    unsigned char SymbolFlags;  // X86II::MO_*
 
     X86ISelAddressMode()
-      : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
-        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) {
+      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
+        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) {
     }
 
     bool hasSymbolicDisplacement() const {
       return GV != 0 || CP != 0 || ES != 0 || JT != -1;
     }
+    
+    bool hasBaseOrIndexReg() const {
+      return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
+    }
+    
+    /// isRIPRelative - Return true if this addressing mode is already RIP
+    /// relative.
+    bool isRIPRelative() const {
+      if (BaseType != RegBase) return false;
+      if (RegisterSDNode *RegNode =
+            dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
+        return RegNode->getReg() == X86::RIP;
+      return false;
+    }
+    
+    void setBaseReg(SDValue Reg) {
+      BaseType = RegBase;
+      Base.Reg = Reg;
+    }
 
     void dump() {
       cerr << "X86ISelAddressMode " << this << "\n";
@@ -91,7 +110,7 @@ namespace {
               if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 
               else cerr << "nul";
       cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
-      cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n";
+      cerr << " Scale" << Scale << "\n";
       cerr << "IndexReg ";
               if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
               else cerr << "nul"; 
@@ -200,14 +219,15 @@ namespace {
       // These are 32-bit even in 64-bit mode since RIP relative offset
       // is 32-bit.
       if (AM.GV)
-        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
+        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
+                                              AM.SymbolFlags);
       else if (AM.CP)
         Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
-                                             AM.Align, AM.Disp);
+                                             AM.Align, AM.Disp, AM.SymbolFlags);
       else if (AM.ES)
-        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
+        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
       else if (AM.JT != -1)
-        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
+        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
       else
         Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
 
@@ -683,61 +703,80 @@ bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
   return true;
 }
 
+/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
+/// into an addressing mode.  These wrap things that will resolve down into a
+/// symbol reference.  If no match is possible, this returns true, otherwise it
+/// returns false.  
 bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
-  bool SymbolicAddressesAreRIPRel =
-    getTargetMachine().symbolicAddressesAreRIPRel();
-  bool is64Bit = Subtarget->is64Bit();
-  DOUT << "Wrapper: 64bit " << is64Bit;
-  DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n";
-
-  // Under X86-64 non-small code model, GV (and friends) are 64-bits.
-  if (is64Bit && (TM.getCodeModel() != CodeModel::Small))
-    return true;
-
-  // Base and index reg must be 0 in order to use rip as base.
-  bool canUsePICRel = !AM.Base.Reg.getNode() && !AM.IndexReg.getNode();
-  if (is64Bit && !canUsePICRel && SymbolicAddressesAreRIPRel)
-    return true;
-
+  // If the addressing mode already has a symbol as the displacement, we can
+  // never match another symbol.
   if (AM.hasSymbolicDisplacement())
     return true;
-  // If value is available in a register both base and index components have
-  // been picked, we can't fit the result available in the register in the
-  // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
 
   SDValue N0 = N.getOperand(0);
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
-    uint64_t Offset = G->getOffset();
-    if (!is64Bit || isInt32(AM.Disp + Offset)) {
-      GlobalValue *GV = G->getGlobal();
-      bool isRIPRel = SymbolicAddressesAreRIPRel;
-      if (N0.getOpcode() == llvm::ISD::TargetGlobalTLSAddress) {
-        TLSModel::Model model =
-          getTLSModel (GV, TM.getRelocationModel());
-        if (is64Bit && model == TLSModel::InitialExec)
-          isRIPRel = true;
-      }
-      AM.GV = GV;
-      AM.Disp += Offset;
-      AM.isRIPRel = isRIPRel;
-      return false;
-    }
-  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
-    uint64_t Offset = CP->getOffset();
-    if (!is64Bit || isInt32(AM.Disp + Offset)) {
+  
+  // Handle X86-64 rip-relative addresses.  We check this before checking direct
+  // folding because RIP is preferable to non-RIP accesses.
+  if (Subtarget->is64Bit() &&
+      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
+      // they cannot be folded into immediate fields.
+      // FIXME: This can be improved for kernel and other models?
+      TM.getCodeModel() == CodeModel::Small &&
+      
+      // Base and index reg must be 0 in order to use %rip as base and lowering
+      // must allow RIP.
+      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+  
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      int64_t Offset = AM.Disp + G->getOffset();
+      if (!isInt32(Offset)) return true;
+      AM.GV = G->getGlobal();
+      AM.Disp = Offset;
+      AM.SymbolFlags = G->getTargetFlags();
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      int64_t Offset = AM.Disp + CP->getOffset();
+      if (!isInt32(Offset)) return true;
       AM.CP = CP->getConstVal();
       AM.Align = CP->getAlignment();
-      AM.Disp += Offset;
-      AM.isRIPRel = SymbolicAddressesAreRIPRel;
-      return false;
+      AM.Disp = Offset;
+      AM.SymbolFlags = CP->getTargetFlags();
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else {
+      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
     }
-  } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
-    AM.ES = S->getSymbol();
-    AM.isRIPRel = SymbolicAddressesAreRIPRel;
+  
+    if (N.getOpcode() == X86ISD::WrapperRIP)
+      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
     return false;
-  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
-    AM.JT = J->getIndex();
-    AM.isRIPRel = SymbolicAddressesAreRIPRel;
+  }
+
+  // Handle the case when globals fit in our immediate field: This is true for
+  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
+  // mode, this results in a non-RIP-relative computation.
+  if (!Subtarget->is64Bit() ||
+      (TM.getCodeModel() == CodeModel::Small &&
+       TM.getRelocationModel() == Reloc::Static)) {
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      AM.GV = G->getGlobal();
+      AM.Disp += G->getOffset();
+      AM.SymbolFlags = G->getTargetFlags();
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      AM.CP = CP->getConstVal();
+      AM.Align = CP->getAlignment();
+      AM.Disp += CP->getOffset();
+      AM.SymbolFlags = CP->getTargetFlags();
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else {
+      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
+    }
     return false;
   }
 
@@ -756,12 +795,19 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
   if (Depth > 5)
     return MatchAddressBase(N, AM);
   
+  // If this is already a %rip relative address, we can only merge immediates
+  // into it.  Instead of handling this in every case, we handle it here.
   // RIP relative addressing: %rip + 32-bit displacement!
-  if (AM.isRIPRel) {
-    if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
-      uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-      if (!is64Bit || isInt32(AM.Disp + Val)) {
-        AM.Disp += Val;
+  if (AM.isRIPRelative()) {
+    // FIXME: JumpTable and ExternalSymbol address currently don't like
+    // displacements.  It isn't very important, but this should be fixed for
+    // consistency.
+    if (!AM.ES && AM.JT != -1) return true;
+    
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
+      int64_t Val = AM.Disp + Cst->getSExtValue();
+      if (isInt32(Val)) {
+        AM.Disp = Val;
         return false;
       }
     }
@@ -785,6 +831,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     break;
 
   case X86ISD::Wrapper:
+  case X86ISD::WrapperRIP:
     if (!MatchWrapper(N, AM))
       return false;
     break;
@@ -804,7 +851,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     break;
 
   case ISD::SHL:
-    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel)
+    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
       break;
       
     if (ConstantSDNode
@@ -845,8 +892,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     // X*[3,5,9] -> X+X*[2,4,8]
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         AM.Base.Reg.getNode() == 0 &&
-        AM.IndexReg.getNode() == 0 &&
-        !AM.isRIPRel) {
+        AM.IndexReg.getNode() == 0) {
       if (ConstantSDNode
             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
         if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
@@ -895,7 +941,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
       break;
     }
     // Test if the index field is free for use.
-    if (AM.IndexReg.getNode() || AM.isRIPRel) {
+    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
       AM = Backup;
       break;
     }
@@ -966,8 +1012,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     // the add.
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         !AM.Base.Reg.getNode() &&
-        !AM.IndexReg.getNode() &&
-        !AM.isRIPRel) {
+        !AM.IndexReg.getNode()) {
       AM.Base.Reg = N.getNode()->getOperand(0);
       AM.IndexReg = N.getNode()->getOperand(1);
       AM.Scale = 1;
@@ -1006,9 +1051,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     // Scale must not be used already.
     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
 
-    // Not when RIP is used as the base.
-    if (AM.isRIPRel) break;
-
     SDValue X = Shift.getOperand(0);
     ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
     ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
@@ -1130,7 +1172,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   // Is the base register already occupied?
   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
     // If so, check to see if the scale index register is set.
-    if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) {
+    if (AM.IndexReg.getNode() == 0) {
       AM.IndexReg = N;
       AM.Scale = 1;
       return false;
@@ -1157,7 +1199,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
   if (AvoidDupAddrCompute && !N.hasOneUse()) {
     unsigned Opcode = N.getOpcode();
     if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
-        Opcode != X86ISD::Wrapper) {
+        Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) {
       // If we are able to fold N into addressing mode, then we'll allow it even
       // if N has multiple uses. In general, addressing computation is used as
       // addresses by all of its uses. But watch out for CopyToReg uses, that
@@ -1307,7 +1349,8 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
   AM.GV = GA->getGlobal();
   AM.Disp += GA->getOffset();
   AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
-  
+  AM.SymbolFlags = GA->getTargetFlags();
+
   if (N.getValueType() == MVT::i32) {
     AM.Scale = 1;
     AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
@@ -1687,7 +1730,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
       
       // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
       // somehow, just ignore it.
-      if (N2.getOpcode() != X86ISD::Wrapper) {
+      if (N2.getOpcode() != X86ISD::Wrapper &&
+          N2.getOpcode() != X86ISD::WrapperRIP) {
         ReplaceUses(N.getValue(0), Chain);
         return NULL;
       }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8d0ea66..9614e69 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
@@ -4311,21 +4312,102 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 SDValue
 X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  // FIXME there isn't really any debug info here, should come from the parent
-  DebugLoc dl = CP->getDebugLoc();
+  
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub())
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    else if (Subtarget->isPICStyleGOT())
+      OpFlag = X86II::MO_GOTOFF;
+    else if (Subtarget->isPICStyleRIPRel() &&
+             getTargetMachine().getCodeModel() == CodeModel::Small)
+      WrapperKind = X86ISD::WrapperRIP;
+  }
+  
   SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
-                                             CP->getAlignment());
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+                                             CP->getAlignment(),
+                                             CP->getOffset(), OpFlag);
+  DebugLoc DL = CP->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub())
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    else if (Subtarget->isPICStyleGOT())
+      OpFlag = X86II::MO_GOTOFF;
+    else if (Subtarget->isPICStyleRIPRel())
+      WrapperKind = X86ISD::WrapperRIP;
+  }
+  
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+                                          OpFlag);
+  DebugLoc DL = JT->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                         Result);
+  }
+  
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
+  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+  
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub())
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    else if (Subtarget->isPICStyleGOT())
+      OpFlag = X86II::MO_GOTOFF;
+    else if (Subtarget->isPICStyleRIPRel())
+      WrapperKind = X86ISD::WrapperRIP;
+  }
+  
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+  
+  DebugLoc DL = Op.getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  
+  
   // With PIC, the address is actually $g + Offset.
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
       !Subtarget->isPICStyleRIPRel()) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg,
                                      DebugLoc::getUnknownLoc(),
                                      getPointerTy()),
                          Result);
   }
-
+  
   return Result;
 }
 
@@ -4343,9 +4425,29 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
   if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
     Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
     Offset = 0;
-  } else
-    Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0);
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+  } else {
+    unsigned char OpFlags = 0;
+    
+    if (Subtarget->isPICStyleRIPRel() &&
+        getTargetMachine().getRelocationModel() != Reloc::Static) {
+      if (ExtraLoadRequired)
+        OpFlags = X86II::MO_GOTPCREL;
+    } else if (Subtarget->isPICStyleGOT() &&
+               getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+      if (ExtraLoadRequired)
+        OpFlags = X86II::MO_GOT;
+      else
+        OpFlags = X86II::MO_GOTOFF;
+    }
+    
+    Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+  }
+  
+  if (Subtarget->isPICStyleRIPRel() &&
+      getTargetMachine().getCodeModel() == CodeModel::Small)
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+  else
+    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
 
   // With PIC, the address is actually $g + Offset.
   if (IsPic && !Subtarget->isPICStyleRIPRel()) {
@@ -4381,12 +4483,14 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
-           SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg) {
+           SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg,
+           unsigned char OperandFlags) {
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
   DebugLoc dl = GA->getDebugLoc();
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
                                            GA->getValueType(0),
-                                           GA->getOffset());
+                                           GA->getOffset(),
+                                           OperandFlags);
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
     Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
@@ -4410,14 +4514,15 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                                  PtrVT), InFlag);
   InFlag = Chain.getValue(1);
 
-  return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX);
+  return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
 static SDValue
 LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                 const MVT PtrVT) {
-  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX);
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::RAX, X86II::MO_TLSGD);
 }
 
 // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
@@ -4435,11 +4540,26 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
   SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
                                       NULL, 0);
 
+  unsigned char OperandFlags = 0;
+  // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
+  // initialexec.
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (model == TLSModel::LocalExec) {
+    OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+  } else if (is64Bit) {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_GOTTPOFF;
+    WrapperKind = X86ISD::WrapperRIP;
+  } else {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_INDNTPOFF;
+  }
+  
   // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
   // exec)
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
-                                           GA->getOffset());
-  SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
+                                           GA->getOffset(), OperandFlags);
+  SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec)
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
@@ -4457,72 +4577,33 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
   assert(Subtarget->isTargetELF() &&
          "TLS not implemented for non-ELF targets");
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-  GlobalValue *GV = GA->getGlobal();
-  TLSModel::Model model =
-    getTLSModel (GV, getTargetMachine().getRelocationModel());
-  if (Subtarget->is64Bit()) {
-    switch (model) {
-    case TLSModel::GeneralDynamic:
-    case TLSModel::LocalDynamic: // not implemented
+  const GlobalValue *GV = GA->getGlobal();
+  
+  // If GV is an alias then use the aliasee for determining
+  // thread-localness.
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+    GV = GA->resolveAliasedGlobal(false);
+  
+  TLSModel::Model model = getTLSModel(GV,
+                                      getTargetMachine().getRelocationModel());
+  
+  switch (model) {
+  case TLSModel::GeneralDynamic:
+  case TLSModel::LocalDynamic: // not implemented
+    if (Subtarget->is64Bit())
       return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
-
-    case TLSModel::InitialExec:
-    case TLSModel::LocalExec:
-      return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, true);
-    }
-  } else {
-    switch (model) {
-    case TLSModel::GeneralDynamic:
-    case TLSModel::LocalDynamic: // not implemented
-      return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
-    case TLSModel::InitialExec:
-    case TLSModel::LocalExec:
-      return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, false);
-    }
+    return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+    
+  case TLSModel::InitialExec:
+  case TLSModel::LocalExec:
+    return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+                               Subtarget->is64Bit());
   }
+  
   assert(0 && "Unreachable");
   return SDValue();
 }
 
-SDValue
-X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
-  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
-  // With PIC, the address is actually $g + Offset.
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->isPICStyleRIPRel()) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(),
-                                     getPointerTy()),
-                         Result);
-  }
-
-  return Result;
-}
-
-SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  // FIXME there isn't really any debug into here
-  DebugLoc dl = JT->getDebugLoc();
-  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
-  // With PIC, the address is actually $g + Offset.
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->isPICStyleRIPRel()) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(),
-                                     getPointerTy()),
-                         Result);
-  }
-
-  return Result;
-}
 
 /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
 /// take a 2 x i32 value to shift plus a shift amount.
@@ -6779,6 +6860,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
+  case X86ISD::WrapperRIP:         return "X86ISD::WrapperRIP";
   case X86ISD::PEXTRB:             return "X86ISD::PEXTRB";
   case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
   case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 063913f..472ba4c 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -45,7 +45,8 @@ def lea64_32mem : Operand<i32> {
 // Complex Pattern Definitions.
 //
 def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
-                        [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
+                        [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper,
+                         X86WrapperRIP],
                         []>;
 
 def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
@@ -1418,6 +1419,9 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>;
 
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
+// should handle this sort of thing.
 def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, tconstpool:$src)>,
           Requires<[SmallCode, IsStatic]>;
@@ -1431,6 +1435,23 @@ def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, texternalsym:$src)>,
           Requires<[SmallCode, IsStatic]>;
 
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
+// should handle this sort of thing.
+def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tconstpool:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tjumptable:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, texternalsym:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+
+
 // Calls
 // Direct PC relative function call for small code model. 32-bit displacement
 // sign extended to 64-bit.
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 39504cd..b50dd65 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -157,10 +157,10 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
 ///
 inline const MachineInstrBuilder &
 addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
-                         unsigned GlobalBaseReg = 0) {
+                         unsigned GlobalBaseReg, unsigned char OpFlags) {
   //FIXME: factor this
   return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
-    .addConstantPoolIndex(CPI).addReg(0);
+    .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
 }
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 8a9b7c9..21f71ec 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -18,6 +18,7 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -28,7 +29,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetAsmInfo.h"
-
 using namespace llvm;
 
 namespace {
@@ -781,6 +781,29 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
 static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) {
   return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
 }
+
+/// CanRematLoadWithDispOperand - Return true if a load with the specified
+/// operand is a candidate for remat: for this to be true we need to know that
+/// the load will always return the same value, even if moved.
+static bool CanRematLoadWithDispOperand(const MachineOperand &MO,
+                                        X86TargetMachine &TM) {
+  // Loads from constant pool entries can be remat'd.
+  if (MO.isCPI()) return true;
+  
+  // We can remat globals in some cases.
+  if (MO.isGlobal()) {
+    // If this is a load of a stub, not of the global, we can remat it.  This
+    // access will always return the address of the global.
+    if (isGVStub(MO.getGlobal(), TM))
+      return true;
+    
+    // If the global itself is constant, we can remat the load.
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal()))
+      if (GV->isConstant())
+        return true;
+  }
+  return false;
+}
  
 bool
 X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
@@ -802,11 +825,9 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
       if (MI->getOperand(1).isReg() &&
           MI->getOperand(2).isImm() &&
           MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
-          (MI->getOperand(4).isCPI() ||
-           (MI->getOperand(4).isGlobal() &&
-            isGVStub(MI->getOperand(4).getGlobal(), TM)))) {
+          CanRematLoadWithDispOperand(MI->getOperand(4), TM)) {
         unsigned BaseReg = MI->getOperand(1).getReg();
-        if (BaseReg == 0)
+        if (BaseReg == 0 || BaseReg == X86::RIP)
           return true;
         // Allow re-materialization of PIC load.
         if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
@@ -3190,9 +3211,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
   bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
   unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
-  if (Desc.getOpcode() == X86::MOVPC32r) {
+  if (Desc.getOpcode() == X86::MOVPC32r)
     Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
-  }
   return Size;
 }
 
@@ -3220,17 +3240,17 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   const TargetInstrInfo *TII = TM.getInstrInfo();
   // Operand of MovePCtoStack is completely ignored by asm printer. It's
   // only used in JIT code emission as displacement to pc.
-  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC)
-    .addImm(0);
+  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
   
   // If we're using vanilla 'GOT' PIC style, we should use relative addressing
-  // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
+  // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
   if (TM.getRelocationModel() == Reloc::PIC_ &&
       TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
-    GlobalBaseReg =
-      RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+    GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+    // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
     BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
-      .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+      .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0,
+                                    X86II::MO_GOT_ABSOLUTE_ADDRESS);
   } else {
     GlobalBaseReg = PC;
   }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index e09769e..83f0194 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -71,7 +71,86 @@ namespace X86 {
 namespace X86II {
   enum {
     //===------------------------------------------------------------------===//
-    // Instruction types.  These are the standard/most common forms for X86
+    // X86 Specific MachineOperand flags.
+    
+    MO_NO_FLAG = 0,
+    
+    /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+    /// relocation of:
+    ///    SYMBOL_LABEL + [. - PICBASELABEL]
+    MO_GOT_ABSOLUTE_ADDRESS = 1,
+    
+    /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+    /// immediate should get the value of the symbol minus the PIC base label:
+    ///    SYMBOL_LABEL - PICBASELABEL
+    MO_PIC_BASE_OFFSET = 2,
+
+    /// MO_GOT - On a symbol operand this indicates that the immediate is the
+    /// offset to the GOT entry for the symbol name from the base of the GOT.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @GOT
+    MO_GOT = 3,
+    
+    /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+    /// the offset to the location of the symbol name from the base of the GOT. 
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @GOTOFF
+    MO_GOTOFF = 4,
+    
+    /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+    /// offset to the GOT entry for the symbol name from the current code
+    /// location. 
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @GOTPCREL
+    MO_GOTPCREL = 5,
+    
+    /// MO_PLT - On a symbol operand this indicates that the immediate is
+    /// offset to the PLT entry of symbol name from the current code location. 
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @PLT
+    MO_PLT = 6,
+    
+    /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @TLSGD
+    MO_TLSGD = 7,
+    
+    /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @GOTTPOFF
+    MO_GOTTPOFF = 8,
+   
+    /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @INDNTPOFF
+    MO_INDNTPOFF = 9,
+    
+    /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @TPOFF
+    MO_TPOFF = 10,
+    
+    /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @NTPOFF
+    MO_NTPOFF = 11,
+    
+    //===------------------------------------------------------------------===//
+    // Instruction encodings.  These are the standard/most common forms for X86
     // instructions.
     //
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 2d8f55f..a6b0880 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -50,9 +50,9 @@ def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
                                 SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
 def SDTX86Ret     : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
 
-def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-def SDT_X86CallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
-                                         SDTCisVT<1, i32> ]>;
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_X86CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32>]>;
 
 def SDT_X86Call   : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
 
@@ -236,6 +236,10 @@ def HasSSE3      : Predicate<"Subtarget->hasSSE3()">;
 def HasSSSE3     : Predicate<"Subtarget->hasSSSE3()">;
 def HasSSE41     : Predicate<"Subtarget->hasSSE41()">;
 def HasSSE42     : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">;
+def HasAVX       : Predicate<"Subtarget->hasAVX()">;
+def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
 def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
 def In32BitMode  : Predicate<"!Subtarget->is64Bit()">;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 43fadc2..b79a006 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -481,11 +481,11 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
 
 // Misc.
 let Uses = [EDI] in
-def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                         "maskmovq\t{$mask, $src|$src, $mask}",
                         [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
 let Uses = [RDI] in
-def MMX_MASKMOVQ64: MMXI64<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                            "maskmovq\t{$mask, $src|$src, $mask}",
                            [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index d552cb3..996baa0 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -439,7 +439,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
 
 def GR64 : RegisterClass<"X86", [i64], 64, 
                          [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-                          RBX, R14, R15, R12, R13, RBP, RSP]> {
+                          RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
   let SubRegClassList = [GR8, GR8, GR16, GR32];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -453,9 +453,9 @@ def GR64 : RegisterClass<"X86", [i64], 64,
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
       if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
-        return end()-2;  // If so, don't allocate RSP or RBP
+        return end()-3;  // If so, don't allocate RIP, RSP or RBP
       else
-        return end()-1;  // If not, just don't allocate RSP
+        return end()-2;  // If not, just don't allocate RIP or RSP
     }
   }];
 }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 56983ce..8506fa6 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -207,6 +207,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+  HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+  HasAVX = ((ECX >> 28) & 0x1);
+
   if (IsIntel || IsAMD) {
     // Determine if bit test memory instructions are slow.
     unsigned Family = 0;
@@ -217,6 +221,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
     X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
     HasX86_64 = (EDX >> 29) & 0x1;
     HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
+    HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
   }
 }
 
@@ -342,6 +347,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
   , X86SSELevel(NoMMXSSE)
   , X863DNowLevel(NoThreeDNow)
   , HasX86_64(false)
+  , HasSSE4A(false)
+  , HasAVX(false)
+  , HasFMA3(false)
+  , HasFMA4(false)
   , IsBTMemSlow(false)
   , DarwinVers(0)
   , IsLinux(false)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 694b0eb..f4f6cce 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -64,12 +64,21 @@ protected:
   ///
   bool HasX86_64;
 
-  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
-  bool IsBTMemSlow;
-  
   /// HasSSE4A - True if the processor supports SSE4A instructions.
   bool HasSSE4A;
 
+  /// HasAVX - Target has AVX instructions
+  bool HasAVX;
+
+  /// HasFMA3 - Target has 3-operand fused multiply-add
+  bool HasFMA3;
+
+  /// HasFMA4 - Target has 4-operand fused multiply-add
+  bool HasFMA4;
+
+  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+  bool IsBTMemSlow;
+  
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
   /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
   unsigned char DarwinVers; // Is any darwin-x86 platform.
@@ -133,6 +142,9 @@ public:
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+  bool hasAVX() const { return hasAVX(); }
+  bool hasFMA3() const { return HasFMA3; }
+  bool hasFMA4() const { return HasFMA4; }
 
   bool isBTMemSlow() const { return IsBTMemSlow; }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 53c46c3..67dcd01 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -36,10 +36,8 @@ X("x86",    "32-bit X86: Pentium-Pro and above");
 static RegisterTarget<X86_64TargetMachine>
 Y("x86-64", "64-bit X86: EM64T and AMD64");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeX86Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeX86Target() { }
 
 // No assembler printer by default
 X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
@@ -222,7 +220,8 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // On Darwin, override 64-bit static relocation to pic_ since the
   // assembler doesn't support it.
   if (DefRelocModel == Reloc::Static &&
-      Subtarget.isTargetDarwin() && Subtarget.is64Bit())
+      Subtarget.isTargetDarwin() && Subtarget.is64Bit() &&
+      getCodeModel() == CodeModel::Small)
     setRelocationModel(Reloc::PIC_);
 
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
@@ -319,11 +318,3 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-/// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
-/// RIP-relative on this machine, taking into consideration the relocation
-/// model and subtarget. RIP-relative addresses cannot have a separate
-/// base or index register.
-bool X86TargetMachine::symbolicAddressesAreRIPRel() const {
-  return getRelocationModel() != Reloc::Static &&
-         Subtarget.isPICStyleRIPRel();
-}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index ecc1d39..ba73ca8 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -91,12 +91,6 @@ public:
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
                                     bool DumpAsm, JITCodeEmitter &JCE);
-
-  /// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
-  /// RIP-relative on this machine, taking into consideration the relocation
-  /// model and subtarget. RIP-relative addresses cannot have a separate
-  /// base or index register.
-  bool symbolicAddressesAreRIPRel() const;
 };
 
 /// X86_32TargetMachine - X86 32-bit target machine.
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index ed4c101..4ab5d75 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -244,9 +244,6 @@ emitGlobal(const GlobalVariable *GV)
     
     // Mark the end of the global
     O << "\t.cc_bottom " << name << ".data\n";
-  } else {
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
   }
 }
 
@@ -375,12 +372,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     printBasicBlockLabel(MO.getMBB());
     break;
   case MachineOperand::MO_GlobalAddress:
-    {
-      const GlobalValue *GV = MO.getGlobal();
-      O << Mang->getValueName(GV);
-      if (GV->hasExternalWeakLinkage())
-        ExtWeakSymbols.insert(GV);
-    }
+    O << Mang->getValueName(MO.getGlobal());
     break;
   case MachineOperand::MO_ExternalSymbol:
     O << MO.getSymbolName();
@@ -430,25 +422,8 @@ bool XCoreAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
   DW = getAnalysisIfAvailable<DwarfWriter>();
   
-  if (!FileDirective.empty()) {
+  if (!FileDirective.empty())
     emitFileDirective(FileDirective);
-  }
-  
-  // Print out type strings for external functions here
-  for (Module::const_iterator I = M.begin(), E = M.end();
-       I != E; ++I) {
-    if (I->isDeclaration() && !I->isIntrinsic()) {
-      switch (I->getLinkage()) {
-      default:
-        assert(0 && "Unexpected linkage");
-      case Function::ExternalWeakLinkage:
-        ExtWeakSymbols.insert(I);
-        // fallthrough
-      case Function::ExternalLinkage:
-        break;
-      }
-    }
-  }
 
   return Result;
 }
@@ -461,8 +436,5 @@ bool XCoreAsmPrinter::doFinalization(Module &M) {
     emitGlobal(I);
   }
   
-  // Emit final debug information.
-  DW->EndModule();
-
   return AsmPrinter::doFinalization(M);
 }
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index cfd3cd3..09227d9 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,10 +31,8 @@ namespace {
   RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
 }
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeXCoreTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() { }
 
 const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
   return new XCoreTargetAsmInfo(*this);
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index ab8fe5f..046e044 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -210,7 +211,25 @@ bool StripDebugInfo(Module &M) {
   SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
   findUsedValues(M, llvmUsedValues);
 
-  // Delete all dbg variables.
+  SmallVector<GlobalVariable *, 2> CUs;
+  SmallVector<GlobalVariable *, 4> GVs;
+  SmallVector<GlobalVariable *, 4> SPs;
+  CollectDebugInfoAnchors(M, CUs, GVs, SPs);
+  // These anchors use LinkOnce linkage so that the optimizer does not
+  // remove them accidently. Set InternalLinkage for all these debug
+  // info anchors.
+  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I)
+    (*I)->setLinkage(GlobalValue::InternalLinkage);
+  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
+         E = GVs.end(); I != E; ++I)
+    (*I)->setLinkage(GlobalValue::InternalLinkage);
+  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
+         E = SPs.end(); I != E; ++I)
+    (*I)->setLinkage(GlobalValue::InternalLinkage);
+
+
+ // Delete all dbg variables.
   for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
        I != E; ++I) {
     GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 7a7c48b..8a8f83f 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -31,3 +31,5 @@ add_llvm_library(LLVMScalarOpts
   TailDuplication.cpp
   TailRecursionElimination.cpp
   )
+
+target_link_libraries (LLVMScalarOpts LLVMTransformUtils)
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6c20e7d..27e377f 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -70,6 +70,7 @@ namespace {
     IVUsers         *IU;
     LoopInfo        *LI;
     ScalarEvolution *SE;
+    DominatorTree   *DT;
     bool Changed;
   public:
 
@@ -101,14 +102,13 @@ namespace {
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
                                    SCEVExpander &Rewriter);
-    void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
+    void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount,
+                               SCEVExpander &Rewriter);
 
     void RewriteIVExpressions(Loop *L, const Type *LargestType,
                               SCEVExpander &Rewriter);
 
-    void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
-
-    void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter);
+    void SinkUnusedInvariants(Loop *L);
 
     void HandleFloatingPointIV(Loop *L, PHINode *PH);
   };
@@ -169,10 +169,10 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
     CmpIndVar = IndVar;
   }
 
-  // Expand the code for the iteration count into the preheader of the loop.
-  BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
-                                          Preheader->getTerminator());
+  // Expand the code for the iteration count.
+  assert(RHS->isLoopInvariant(L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   ICmpInst::Predicate Opcode;
@@ -214,28 +214,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 /// able to brute-force evaluate arbitrary instructions as long as they have
 /// constant operands at the beginning of the loop.
 void IndVarSimplify::RewriteLoopExitValues(Loop *L,
-                                           const SCEV *BackedgeTakenCount) {
+                                           const SCEV *BackedgeTakenCount,
+                                           SCEVExpander &Rewriter) {
   // Verify the input to the pass in already in LCSSA form.
   assert(L->isLCSSAForm());
 
-  BasicBlock *Preheader = L->getLoopPreheader();
-
-  // Scan all of the instructions in the loop, looking at those that have
-  // extra-loop users and which are recurrences.
-  SCEVExpander Rewriter(*SE);
-
-  // We insert the code into the preheader of the loop if the loop contains
-  // multiple exit blocks, or in the exit block if there is exactly one.
-  BasicBlock *BlockToInsertInto;
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() == 1)
-    BlockToInsertInto = ExitBlocks[0];
-  else
-    BlockToInsertInto = Preheader;
-  BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI();
-
-  std::map<Instruction*, Value*> ExitValues;
 
   // Find all values that are computed inside the loop, but used outside of it.
   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
@@ -285,11 +270,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
         Changed = true;
         ++NumReplaced;
 
-        // See if we already computed the exit value for the instruction, if so,
-        // just reuse it.
-        Value *&ExitVal = ExitValues[Inst];
-        if (!ExitVal)
-          ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), InsertPt);
+        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
              << "  LoopVal = " << *Inst << "\n";
@@ -309,6 +290,15 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
           break;
         }
       }
+      if (ExitBlocks.size() != 1) {
+        // Clone the PHI and delete the original one. This lets IVUsers and
+        // any other maps purge the original user from their records.
+        PHINode *NewPN = PN->clone();
+        NewPN->takeName(PN);
+        NewPN->insertBefore(PN);
+        PN->replaceAllUsesWith(NewPN);
+        PN->eraseFromParent();
+      }
     }
   }
 }
@@ -340,16 +330,19 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
+  DT = &getAnalysis<DominatorTree>();
   Changed = false;
 
   // If there are any floating-point recurrences, attempt to
   // transform them to use integer recurrences.
   RewriteNonIntegerIVs(L);
 
-  BasicBlock *Header       = L->getHeader();
   BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
   const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
+  // Create a rewriter object which we'll use to transform the code with.
+  SCEVExpander Rewriter(*SE);
+
   // Check to see if this loop has a computable loop-invariant execution count.
   // If so, this means that we can compute the final value of any expressions
   // that are recurrent in the loop, and substitute the exit values from the
@@ -357,7 +350,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // the current expressions.
   //
   if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    RewriteLoopExitValues(L, BackedgeTakenCount);
+    RewriteLoopExitValues(L, BackedgeTakenCount, Rewriter);
 
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
@@ -388,9 +381,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
       NeedCannIV = true;
   }
 
-  // Create a rewriter object which we'll use to transform the code with.
-  SCEVExpander Rewriter(*SE);
-
   // Now that we know the largest of of the induction variable expressions
   // in this loop, insert a canonical induction variable of the largest size.
   Value *IndVar = 0;
@@ -408,7 +398,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
         OldCannIV = 0;
     }
 
-    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
 
     ++NumInserted;
     Changed = true;
@@ -434,20 +424,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
                                           ExitingBlock, BI, Rewriter);
   }
 
-  Rewriter.setInsertionPoint(Header->getFirstNonPHI());
-
   // Rewrite IV-derived expressions. Clears the rewriter cache.
   RewriteIVExpressions(L, LargestType, Rewriter);
 
-  // The Rewriter may only be used for isInsertedInstruction queries from this
-  // point on.
+  // The Rewriter may not be used from this point on.
 
   // Loop-invariant instructions in the preheader that aren't used in the
   // loop may be sunk below the loop to reduce register pressure.
-  SinkUnusedInvariants(L, Rewriter);
-
-  // Reorder instructions to avoid use-before-def conditions.
-  FixUsesBeforeDefs(L, Rewriter);
+  SinkUnusedInvariants(L);
 
   // For completeness, inform IVUsers of the IV use in the newly-created
   // loop exit test instruction.
@@ -488,29 +472,35 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
       // Compute the final addrec to expand into code.
       const SCEV* AR = IU->getReplacementExpr(*UI);
 
-      Value *NewVal = 0;
-      if (AR->isLoopInvariant(L)) {
-        BasicBlock::iterator I = Rewriter.getInsertionPoint();
-        // Expand loop-invariant values in the loop preheader. They will
-        // be sunk to the exit block later, if possible.
-        NewVal =
-          Rewriter.expandCodeFor(AR, UseTy,
-                                 L->getLoopPreheader()->getTerminator());
-        Rewriter.setInsertionPoint(I);
-        ++NumReplaced;
-      } else {
-        // FIXME: It is an extremely bad idea to indvar substitute anything more
-        // complex than affine induction variables.  Doing so will put expensive
-        // polynomial evaluations inside of the loop, and the str reduction pass
-        // currently can only reduce affine polynomials.  For now just disable
-        // indvar subst on anything more complex than an affine addrec, unless
-        // it can be expanded to a trivial value.
-        if (!Stride->isLoopInvariant(L))
-          continue;
-
-        // Now expand it into actual Instructions and patch it into place.
-        NewVal = Rewriter.expandCodeFor(AR, UseTy);
-      }
+      // FIXME: It is an extremely bad idea to indvar substitute anything more
+      // complex than affine induction variables.  Doing so will put expensive
+      // polynomial evaluations inside of the loop, and the str reduction pass
+      // currently can only reduce affine polynomials.  For now just disable
+      // indvar subst on anything more complex than an affine addrec, unless
+      // it can be expanded to a trivial value.
+      if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+        continue;
+
+      // Determine the insertion point for this user. By default, insert
+      // immediately before the user. The SCEVExpander class will automatically
+      // hoist loop invariants out of the loop. For PHI nodes, there may be
+      // multiple uses, so compute the nearest common dominator for the
+      // incoming blocks.
+      Instruction *InsertPt = User;
+      if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+          if (PHI->getIncomingValue(i) == Op) {
+            if (InsertPt == User)
+              InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+            else
+              InsertPt =
+                DT->findNearestCommonDominator(InsertPt->getParent(),
+                                               PHI->getIncomingBlock(i))
+                      ->getTerminator();
+          }
+
+      // Now expand it into actual Instructions and patch it into place.
+      Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
 
       // Patch the new value into place.
       if (Op->hasName())
@@ -543,19 +533,20 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
 /// If there's a single exit block, sink any loop-invariant values that
 /// were defined in the preheader but not used inside the loop into the
 /// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
   BasicBlock *ExitBlock = L->getExitBlock();
   if (!ExitBlock) return;
 
-  Instruction *NonPHI = ExitBlock->getFirstNonPHI();
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock::iterator I = Preheader->getTerminator();
   while (I != Preheader->begin()) {
     --I;
-    // New instructions were inserted at the end of the preheader. Only
-    // consider those new instructions.
-    if (!Rewriter.isInsertedInstruction(I))
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
       break;
+    if (I->isTrapping())
+      continue;
     // Determine if there is a use in or before the loop (direct or
     // otherwise).
     bool UsedInLoop = false;
@@ -582,75 +573,13 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
       --I;
     else
       Done = true;
-    ToMove->moveBefore(NonPHI);
+    ToMove->moveBefore(InsertPt);
     if (Done)
       break;
+    InsertPt = ToMove;
   }
 }
 
-/// Re-schedule the inserted instructions to put defs before uses. This
-/// fixes problems that arrise when SCEV expressions contain loop-variant
-/// values unrelated to the induction variable which are defined inside the
-/// loop. FIXME: It would be better to insert instructions in the right
-/// place so that this step isn't needed.
-void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) {
-  // Visit all the blocks in the loop in pre-order dom-tree dfs order.
-  DominatorTree *DT = &getAnalysis<DominatorTree>();
-  std::map<Instruction *, unsigned> NumPredsLeft;
-  SmallVector<DomTreeNode *, 16> Worklist;
-  Worklist.push_back(DT->getNode(L->getHeader()));
-  do {
-    DomTreeNode *Node = Worklist.pop_back_val();
-    for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I)
-      if (L->contains((*I)->getBlock()))
-        Worklist.push_back(*I);
-    BasicBlock *BB = Node->getBlock();
-    // Visit all the instructions in the block top down.
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      // Count the number of operands that aren't properly dominating.
-      unsigned NumPreds = 0;
-      if (Rewriter.isInsertedInstruction(I) && !isa<PHINode>(I))
-        for (User::op_iterator OI = I->op_begin(), OE = I->op_end();
-             OI != OE; ++OI)
-          if (Instruction *Inst = dyn_cast<Instruction>(OI))
-            if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst))
-              ++NumPreds;
-      NumPredsLeft[I] = NumPreds;
-      // Notify uses of the position of this instruction, and move the
-      // users (and their dependents, recursively) into place after this
-      // instruction if it is their last outstanding operand.
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE; ++UI) {
-        Instruction *Inst = cast<Instruction>(UI);
-        std::map<Instruction *, unsigned>::iterator Z = NumPredsLeft.find(Inst);
-        if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) {
-          SmallVector<Instruction *, 4> UseWorkList;
-          UseWorkList.push_back(Inst);
-          BasicBlock::iterator InsertPt = I;
-          if (InvokeInst *II = dyn_cast<InvokeInst>(InsertPt))
-            InsertPt = II->getNormalDest()->begin();
-          else
-            ++InsertPt;
-          while (isa<PHINode>(InsertPt)) ++InsertPt;
-          do {
-            Instruction *Use = UseWorkList.pop_back_val();
-            Use->moveBefore(InsertPt);
-            NumPredsLeft.erase(Use);
-            for (Value::use_iterator IUI = Use->use_begin(),
-                 IUE = Use->use_end(); IUI != IUE; ++IUI) {
-              Instruction *IUIInst = cast<Instruction>(IUI);
-              if (L->contains(IUIInst->getParent()) &&
-                  Rewriter.isInsertedInstruction(IUIInst) &&
-                  !isa<PHINode>(IUIInst))
-                UseWorkList.push_back(IUIInst);
-            }
-          } while (!UseWorkList.empty());
-        }
-      }
-    }
-  } while (!Worklist.empty());
-}
-
 /// Return true if it is OK to use SIToFPInst for an inducation variable
 /// with given inital and exit values.
 static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index a088230..7a24b35 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -108,7 +108,7 @@ static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
 Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 
 /// Rotate Loop L as many times as possible. Return true if
-/// loop is rotated at least once.
+/// the loop is rotated at least once.
 bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
 
   bool RotatedOneLoop = false;
@@ -132,15 +132,15 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   OrigPreHeader = L->getLoopPreheader();
   OrigLatch = L->getLoopLatch();
 
-  // If loop has only one block then there is not much to rotate.
+  // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
 
   assert(OrigHeader && OrigLatch && OrigPreHeader &&
          "Loop is not in canonical form");
 
-  // If loop header is not one of the loop exit block then
-  // either this loop is already rotated or it is not 
+  // If the loop header is not one of the loop exiting blocks then
+  // either this loop is already rotated or it is not
   // suitable for loop rotation transformations.
   if (!L->isLoopExit(OrigHeader))
     return false;
@@ -189,19 +189,19 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   assert(L->contains(NewHeader) && !L->contains(Exit) && 
          "Unable to determine loop header and exit blocks");
   
-  // This code assumes that new header has exactly one predecessor.  Remove any
-  // single entry PHI nodes in it.
+  // This code assumes that the new header has exactly one predecessor.
+  // Remove any single-entry PHI nodes in it.
   assert(NewHeader->getSinglePredecessor() &&
          "New header doesn't have one pred!");
   FoldSingleEntryPHINodes(NewHeader);
 
-  // Copy PHI nodes and other instructions from original header
-  // into original pre-header. Unlike original header, original pre-header is
-  // not a member of loop. 
+  // Copy PHI nodes and other instructions from the original header
+  // into the original pre-header. Unlike the original header, the original
+  // pre-header is not a member of the loop.
   //
-  // New loop header is one and only successor of original header that 
+  // The new loop header is the one and only successor of original header that
   // is inside the loop. All other original header successors are outside 
-  // the loop. Copy PHI Nodes from original header into new loop header. 
+  // the loop. Copy PHI Nodes from the original header into the new loop header.
   // Add second incoming value, from original loop pre-header into these phi 
   // nodes. If a value defined in original header is used outside original 
   // header then new loop header will need new phi nodes with two incoming 
@@ -218,8 +218,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
     // are directly propagated.
     Value *NPV = PN->getIncomingValueForBlock(OrigPreHeader);
 
-    // Create new PHI node with two incoming values for NewHeader.
-    // One incoming value is from OrigLatch (through OrigHeader) and 
+    // Create a new PHI node with two incoming values for NewHeader.
+    // One incoming value is from OrigLatch (through OrigHeader) and the
     // second incoming value is from original pre-header.
     PHINode *NH = PHINode::Create(PN->getType(), PN->getName(),
                                   NewHeader->begin());
@@ -334,8 +334,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
         // Add second incoming argument from new Pre header.
         UPhi->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
       } else {
-        // Used outside Exit block. Create a new PHI node from exit block
-        // to receive value from ne new header ane pre header.
+        // Used outside Exit block. Create a new PHI node in the exit block
+        // to receive the value from the new header and pre-header.
         PHINode *PN = PHINode::Create(U->getType(), U->getName(),
                                       Exit->begin());
         PN->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
@@ -367,16 +367,13 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
 }
 
 /// Make sure all Exit block PHINodes have required incoming values.
-/// If incoming value is constant or defined outside the loop then
-/// PHINode may not have an entry for original pre-header. 
+/// If an incoming value is constant or defined outside the loop then
+/// PHINode may not have an entry for the original pre-header.
 void LoopRotate::updateExitBlock() {
 
-  for (BasicBlock::iterator I = Exit->begin(), E = Exit->end();
-       I != E; ++I) {
-
-    PHINode *PN = dyn_cast<PHINode>(I);
-    if (!PN)
-      break;
+  PHINode *PN;
+  for (BasicBlock::iterator I = Exit->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I) {
 
     // There is already one incoming value from original pre-header block.
     if (PN->getBasicBlockIndex(OrigPreHeader) != -1)
@@ -384,7 +381,7 @@ void LoopRotate::updateExitBlock() {
 
     const RenameData *ILoopHeaderInfo;
     Value *V = PN->getIncomingValueForBlock(OrigHeader);
-    if (isa<Instruction>(V) && 
+    if (isa<Instruction>(V) &&
         (ILoopHeaderInfo = findReplacementData(cast<Instruction>(V)))) {
       assert(ILoopHeaderInfo->PreHeader && "Missing New Preheader Instruction");
       PN->addIncoming(ILoopHeaderInfo->PreHeader, OrigPreHeader);
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ba60058..a877c4e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -409,16 +409,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
 
   const SCEV* NewValSCEV = SE->getUnknown(Base);
 
-  // If there is no immediate value, skip the next part.
-  if (!Imm->isZero()) {
-    // If we are inserting the base and imm values in the same block, make sure
-    // to adjust the IP position if insertion reused a result.
-    if (IP == BaseInsertPt)
-      IP = Rewriter.getInsertionPoint();
-
-    // Always emit the immediate (if non-zero) into the same block as the user.
-    NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
-  }
+  // Always emit the immediate into the same block as the user.
+  NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
 
   return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
 }
@@ -1642,7 +1634,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
       // the preheader, instead of being forward substituted into the uses.  We
       // do this by forcing a BitCast (noop cast) to be inserted into the
       // preheader in this case.
-      if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false)) {
+      if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
+          !isa<Instruction>(BaseV)) {
         // We want this constant emitted into the preheader! This is just
         // using cast as a copy so BitCast (no-op cast) is appropriate
         BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
diff --git a/lib/Transforms/Scalar/PredicateSimplifier.cpp b/lib/Transforms/Scalar/PredicateSimplifier.cpp
index b9b5688..a3cb751 100644
--- a/lib/Transforms/Scalar/PredicateSimplifier.cpp
+++ b/lib/Transforms/Scalar/PredicateSimplifier.cpp
@@ -110,6 +110,8 @@ STATISTIC(NumSimple      , "Number of simple replacements");
 STATISTIC(NumBlocks      , "Number of blocks marked unreachable");
 STATISTIC(NumSnuggle     , "Number of comparisons snuggled");
 
+static const ConstantRange empty(1, false);
+
 namespace {
   class DomTreeDFS {
   public:
@@ -939,7 +941,6 @@ namespace {
       const_iterator end()   const { return RangeList.end(); }
 
       iterator find(DomTreeDFS::Node *Subtree) {
-        static ConstantRange empty(1, false);
         iterator E = end();
         iterator I = std::lower_bound(begin(), E,
                                       std::make_pair(Subtree, empty), swo);
@@ -949,7 +950,6 @@ namespace {
       }
 
       const_iterator find(DomTreeDFS::Node *Subtree) const {
-        static const ConstantRange empty(1, false);
         const_iterator E = end();
         const_iterator I = std::lower_bound(begin(), E,
                                             std::make_pair(Subtree, empty), swo);
@@ -962,7 +962,6 @@ namespace {
         assert(!CR.isEmptySet() && "Empty ConstantRange.");
         assert(!CR.isSingleElement() && "Refusing to store single element.");
 
-        static ConstantRange empty(1, false);
         iterator E = end();
         iterator I =
             std::lower_bound(begin(), E, std::make_pair(Subtree, empty), swo);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 6628b4b..d68bf02 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -25,3 +25,5 @@ add_llvm_library(LLVMTransformUtils
   ValueMapper.cpp
   InstructionNamer.cpp
   )
+
+target_link_libraries (LLVMTransformUtils LLVMSupport)
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 7d4f3a3..d5e7303 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -149,7 +149,16 @@ void LCSSA::ProcessInstruction(Instruction *Instr,
   // Keep track of the blocks that have the value available already.
   DenseMap<DomTreeNode*, Value*> Phis;
 
-  DomTreeNode *InstrNode = DT->getNode(Instr->getParent());
+  BasicBlock *DomBB = Instr->getParent();
+
+  // Invoke instructions are special in that their result value is not available
+  // along their unwind edge. The code below tests to see whether DomBB dominates
+  // the value, so adjust DomBB to the normal destination block, which is
+  // effectively where the value is first usable.
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Instr))
+    DomBB = Inv->getNormalDest();
+
+  DomTreeNode *DomNode = DT->getNode(DomBB);
 
   // Insert the LCSSA phi's into the exit blocks (dominated by the value), and
   // add them to the Phi's map.
@@ -158,7 +167,7 @@ void LCSSA::ProcessInstruction(Instruction *Instr,
     BasicBlock *BB = *BBI;
     DomTreeNode *ExitBBNode = DT->getNode(BB);
     Value *&Phi = Phis[ExitBBNode];
-    if (!Phi && DT->dominates(InstrNode, ExitBBNode)) {
+    if (!Phi && DT->dominates(DomNode, ExitBBNode)) {
       PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa",
                                     BB->begin());
       PN->reserveOperandSpace(PredCache.GetNumPreds(BB));
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index c7fff54..8c08638 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -340,44 +340,3 @@ bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I,
   return true;
 }
 
-/// UserIsDebugInfo - Return true if U is a constant expr used by 
-/// llvm.dbg.variable or llvm.dbg.global_variable
-bool llvm::UserIsDebugInfo(User *U) {
-  ConstantExpr *CE = dyn_cast<ConstantExpr>(U);
-
-  if (!CE || CE->getNumUses() != 1)
-    return false;
-
-  Constant *Init = dyn_cast<Constant>(CE->use_back());
-  if (!Init || Init->getNumUses() != 1)
-    return false;
-
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(Init->use_back());
-  if (!GV || !GV->hasInitializer() || GV->getInitializer() != Init)
-    return false;
-
-  DIVariable DV(GV);
-  if (!DV.isNull()) 
-    return true; // User is llvm.dbg.variable
-
-  DIGlobalVariable DGV(GV);
-  if (!DGV.isNull())
-    return true; // User is llvm.dbg.global_variable
-
-  return false;
-}
-
-/// RemoveDbgInfoUser - Remove an User which is representing debug info.
-void llvm::RemoveDbgInfoUser(User *U) {
-  assert (UserIsDebugInfo(U) && "Unexpected User!");
-  ConstantExpr *CE = cast<ConstantExpr>(U);
-  while (!CE->use_empty()) {
-    Constant *C = cast<Constant>(CE->use_back());
-    while (!C->use_empty()) {
-      GlobalVariable *GV = cast<GlobalVariable>(C->use_back());
-      GV->eraseFromParent();
-    }
-    C->destroyConstant();
-  }
-  CE->destroyConstant();
-}
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index c164a3b..a9e4e78 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -128,8 +128,8 @@ bool Constant::ContainsRelocations(unsigned Kind) const {
 }
 
 // Static constructor to create a '0' constant of arbitrary type...
+static const uint64_t zero[2] = {0, 0};
 Constant *Constant::getNullValue(const Type *Ty) {
-  static uint64_t zero[2] = {0, 0};
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID:
     return ConstantInt::get(Ty, 0);
@@ -1803,6 +1803,17 @@ MDString *MDString::get(const char *StrBegin, const char *StrEnd) {
   return S;
 }
 
+MDString *MDString::get(const std::string &Str) {
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(
+                                        Str.data(), Str.data() + Str.size());
+  MDString *&S = Entry.getValue();
+  if (!S) S = new MDString(Entry.getKeyData(),
+                           Entry.getKeyData() + Entry.getKeyLength());
+
+  return S;
+}
+
 void MDString::destroyConstant() {
   sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
   MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd));
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
index 6be06d22..1a68b89 100644
--- a/lib/VMCore/Mangler.cpp
+++ b/lib/VMCore/Mangler.cpp
@@ -165,10 +165,10 @@ std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) {
   } else if (!GV->hasName()) {
     // Must mangle the global into a unique ID.
     unsigned TypeUniqueID = getTypeID(GV->getType());
-    static int32_t GlobalID = 0;
+    static uint32_t GlobalID = 0;
     
-    int32_t OldID = GlobalID;
-    sys::AtomicIncrement32(&GlobalID);
+    unsigned OldID = GlobalID;
+    sys::AtomicIncrement(&GlobalID);
     
     Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(OldID);
   } else {
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index e943e31..b037994 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/System/Atomic.h"
+#include "llvm/System/Mutex.h"
 #include "llvm/System/Threading.h"
 #include <algorithm>
 #include <map>
@@ -187,6 +188,7 @@ public:
 }
 
 static std::vector<PassRegistrationListener*> *Listeners = 0;
+static sys::SmartMutex<true> ListenersLock;
 
 // FIXME: This should use ManagedStatic to manage the pass registrar.
 // Unfortunately, we can't do this, because passes are registered with static
@@ -231,6 +233,7 @@ void PassInfo::registerPass() {
   getPassRegistrar()->RegisterPass(*this);
 
   // Notify any listeners.
+  sys::SmartScopedLock<true> Lock(&ListenersLock);
   if (Listeners)
     for (std::vector<PassRegistrationListener*>::iterator
            I = Listeners->begin(), E = Listeners->end(); I != E; ++I)
@@ -283,12 +286,14 @@ RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
 // PassRegistrationListener ctor - Add the current object to the list of
 // PassRegistrationListeners...
 PassRegistrationListener::PassRegistrationListener() {
+  sys::SmartScopedLock<true> Lock(&ListenersLock);
   if (!Listeners) Listeners = new std::vector<PassRegistrationListener*>();
   Listeners->push_back(this);
 }
 
 // dtor - Remove object from list of listeners...
 PassRegistrationListener::~PassRegistrationListener() {
+  sys::SmartScopedLock<true> Lock(&ListenersLock);
   std::vector<PassRegistrationListener*>::iterator I =
     std::find(Listeners->begin(), Listeners->end(), this);
   assert(Listeners && I != Listeners->end() &&