236 files changed, 10145 insertions, 4430 deletions
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 6b27cf4..9eecc33 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/Streams.h"
 
 using namespace llvm;
@@ -321,13 +322,140 @@ bool DISubprogram::describes(const Function *F) {
 }
 
 //===----------------------------------------------------------------------===//
+// DIDescriptor: dump routines for all descriptors.
+//===----------------------------------------------------------------------===//
+
+
+/// dump - Print descriptor.
+void DIDescriptor::dump() const {
+  cerr << "[" << dwarf::TagString(getTag()) << "] ";
+  cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec;
+}
+
+/// dump - Print compile unit.
+void DICompileUnit::dump() const {
+  if (getLanguage())
+    cerr << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+
+  std::string Res1, Res2;
+  cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]";
+}
+
+/// dump - Print type.
+void DIType::dump() const {
+  if (isNull()) return;
+
+  std::string Res;
+  if (!getName(Res).empty())
+    cerr << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  cerr << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().dump();
+  cerr << " [" 
+       << getLineNumber() << ", " 
+       << getSizeInBits() << ", "
+       << getAlignInBits() << ", "
+       << getOffsetInBits() 
+       << "] ";
+
+  if (isPrivate()) 
+    cerr << " [private] ";
+  else if (isProtected())
+    cerr << " [protected] ";
+
+  if (isForwardDecl())
+    cerr << " [fwd] ";
+
+  if (isBasicType(Tag))
+    DIBasicType(DbgGV).dump();
+  else if (isDerivedType(Tag))
+    DIDerivedType(DbgGV).dump();
+  else if (isCompositeType(Tag))
+    DICompositeType(DbgGV).dump();
+  else {
+    cerr << "Invalid DIType\n";
+    return;
+  }
+
+  cerr << "\n";
+}
+
+/// dump - Print basic type.
+void DIBasicType::dump() const {
+  cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+}
+
+/// dump - Print derived type.
+void DIDerivedType::dump() const {
+  cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump();
+}
+
+/// dump - Print composite type.
+void DICompositeType::dump() const {
+  DIArray A = getTypeArray();
+  if (A.isNull())
+    return;
+  cerr << " [" << A.getNumElements() << " elements]";
+}
+
+/// dump - Print global.
+void DIGlobal::dump() const {
+  std::string Res;
+  if (!getName(Res).empty())
+    cerr << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  cerr << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().dump();
+  cerr << " [" << getLineNumber() << "] ";
+
+  if (isLocalToUnit())
+    cerr << " [local] ";
+
+  if (isDefinition())
+    cerr << " [def] ";
+
+  if (isGlobalVariable(Tag))
+    DIGlobalVariable(DbgGV).dump();
+
+  cerr << "\n";
+}
+
+/// dump - Print subprogram.
+void DISubprogram::dump() const {
+  DIGlobal::dump();
+}
+
+/// dump - Print global variable.
+void DIGlobalVariable::dump() const {
+  cerr << " ["; getGlobal()->dump(); cerr << "] ";
+}
+
+/// dump - Print variable.
+void DIVariable::dump() const {
+  std::string Res;
+  if (!getName(Res).empty())
+    cerr << " [" << Res << "] ";
+
+  getCompileUnit().dump();
+  cerr << " [" << getLineNumber() << "] ";
+  getType().dump();
+  cerr << "\n";
+}
+
+//===----------------------------------------------------------------------===//
 // DIFactory: Basic Helpers
 //===----------------------------------------------------------------------===//
 
 DIFactory::DIFactory(Module &m)
   : M(m), StopPointFn(0), FuncStartFn(0), RegionStartFn(0), RegionEndFn(0),
     DeclareFn(0) {
-  EmptyStructPtr = PointerType::getUnqual(StructType::get(NULL, NULL));
+  EmptyStructPtr = PointerType::getUnqual(StructType::get());
 }
 
 /// getCastToEmpty - Return this descriptor as a Constant* with type '{}*'.
@@ -923,127 +1051,108 @@ namespace llvm {
       }
     }
   }
-}
-
-/// dump - Print descriptor.
-void DIDescriptor::dump() const {
-  cerr << "[" << dwarf::TagString(getTag()) << "] ";
-  cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec;
-}
-
-/// dump - Print compile unit.
-void DICompileUnit::dump() const {
-  if (getLanguage())
-    cerr << " [" << dwarf::LanguageString(getLanguage()) << "] ";
-
-  std::string Res1, Res2;
-  cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]";
-}
-
-/// dump - Print type.
-void DIType::dump() const {
-  if (isNull()) return;
-
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
-
-  unsigned Tag = getTag();
-  cerr << " [" << dwarf::TagString(Tag) << "] ";
-
-  // TODO : Print context
-  getCompileUnit().dump();
-  cerr << " [" 
-       << getLineNumber() << ", " 
-       << getSizeInBits() << ", "
-       << getAlignInBits() << ", "
-       << getOffsetInBits() 
-       << "] ";
-
-  if (isPrivate()) 
-    cerr << " [private] ";
-  else if (isProtected())
-    cerr << " [protected] ";
-
-  if (isForwardDecl())
-    cerr << " [fwd] ";
 
-  if (isBasicType(Tag))
-    DIBasicType(DbgGV).dump();
-  else if (isDerivedType(Tag))
-    DIDerivedType(DbgGV).dump();
-  else if (isCompositeType(Tag))
-    DICompositeType(DbgGV).dump();
-  else {
-    cerr << "Invalid DIType\n";
-    return;
+  /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug 
+  /// info intrinsic.
+  bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, 
+                                 CodeGenOpt::Level OptLev) {
+    return DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLev);
   }
 
-  cerr << "\n";
-}
-
-/// dump - Print basic type.
-void DIBasicType::dump() const {
-  cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
-}
-
-/// dump - Print derived type.
-void DIDerivedType::dump() const {
-  cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump();
-}
-
-/// dump - Print composite type.
-void DICompositeType::dump() const {
-  DIArray A = getTypeArray();
-  if (A.isNull())
-    return;
-  cerr << " [" << A.getNumElements() << " elements]";
-}
+  /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug 
+  /// info intrinsic.
+  bool isValidDebugInfoIntrinsic(DbgFuncStartInst &FSI,
+                                 CodeGenOpt::Level OptLev) {
+    return DIDescriptor::ValidDebugInfo(FSI.getSubprogram(), OptLev);
+  }
 
-/// dump - Print global.
-void DIGlobal::dump() const {
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+  /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug 
+  /// info intrinsic.
+  bool isValidDebugInfoIntrinsic(DbgRegionStartInst &RSI,
+                                 CodeGenOpt::Level OptLev) {
+    return DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLev);
+  }
 
-  unsigned Tag = getTag();
-  cerr << " [" << dwarf::TagString(Tag) << "] ";
+  /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug 
+  /// info intrinsic.
+  bool isValidDebugInfoIntrinsic(DbgRegionEndInst &REI,
+                                 CodeGenOpt::Level OptLev) {
+    return DIDescriptor::ValidDebugInfo(REI.getContext(), OptLev);
+  }
 
-  // TODO : Print context
-  getCompileUnit().dump();
-  cerr << " [" << getLineNumber() << "] ";
 
-  if (isLocalToUnit())
-    cerr << " [local] ";
+  /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug 
+  /// info intrinsic.
+  bool isValidDebugInfoIntrinsic(DbgDeclareInst &DI,
+                                 CodeGenOpt::Level OptLev) {
+    return DIDescriptor::ValidDebugInfo(DI.getVariable(), OptLev);
+  }
 
-  if (isDefinition())
-    cerr << " [def] ";
+  /// ExtractDebugLocation - Extract debug location information 
+  /// from llvm.dbg.stoppoint intrinsic.
+  DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI,
+                                DebugLocTracker &DebugLocInfo) {
+    DebugLoc DL;
+    Value *Context = SPI.getContext();
+
+    // If this location is already tracked then use it.
+    DebugLocTuple Tuple(cast<GlobalVariable>(Context), SPI.getLine(), 
+                        SPI.getColumn());
+    DenseMap<DebugLocTuple, unsigned>::iterator II
+      = DebugLocInfo.DebugIdMap.find(Tuple);
+    if (II != DebugLocInfo.DebugIdMap.end())
+      return DebugLoc::get(II->second);
+
+    // Add a new location entry.
+    unsigned Id = DebugLocInfo.DebugLocations.size();
+    DebugLocInfo.DebugLocations.push_back(Tuple);
+    DebugLocInfo.DebugIdMap[Tuple] = Id;
+    
+    return DebugLoc::get(Id);
+  }
 
-  if (isGlobalVariable(Tag))
-    DIGlobalVariable(DbgGV).dump();
+  /// ExtractDebugLocation - Extract debug location information 
+  /// from llvm.dbg.func_start intrinsic.
+  DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI,
+                                DebugLocTracker &DebugLocInfo) {
+    DebugLoc DL;
+    Value *SP = FSI.getSubprogram();
+
+    DISubprogram Subprogram(cast<GlobalVariable>(SP));
+    unsigned Line = Subprogram.getLineNumber();
+    DICompileUnit CU(Subprogram.getCompileUnit());
+
+    // If this location is already tracked then use it.
+    DebugLocTuple Tuple(CU.getGV(), Line, /* Column */ 0);
+    DenseMap<DebugLocTuple, unsigned>::iterator II
+      = DebugLocInfo.DebugIdMap.find(Tuple);
+    if (II != DebugLocInfo.DebugIdMap.end())
+      return DebugLoc::get(II->second);
+
+    // Add a new location entry.
+    unsigned Id = DebugLocInfo.DebugLocations.size();
+    DebugLocInfo.DebugLocations.push_back(Tuple);
+    DebugLocInfo.DebugIdMap[Tuple] = Id;
+    
+    return DebugLoc::get(Id);
+  }
 
-  cerr << "\n";
-}
+  /// isInlinedFnStart - Return true if FSI is starting an inlined function.
+  bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) {
+    DISubprogram Subprogram(cast<GlobalVariable>(FSI.getSubprogram()));
+    if (Subprogram.describes(CurrentFn))
+      return false;
 
-/// dump - Print subprogram.
-void DISubprogram::dump() const {
-  DIGlobal::dump();
-}
+    return true;
+  }
 
-/// dump - Print global variable.
-void DIGlobalVariable::dump() const {
-  cerr << " ["; getGlobal()->dump(); cerr << "] ";
-}
+  /// isInlinedFnEnd - Return true if REI is ending an inlined function.
+  bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) {
+    DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+    if (Subprogram.isNull() || Subprogram.describes(CurrentFn))
+      return false;
 
-/// dump - Print variable.
-void DIVariable::dump() const {
-  std::string Res;
-  if (!getName(Res).empty())
-    cerr << " [" << Res << "] ";
+    return true;
+  }
 
-  getCompileUnit().dump();
-  cerr << " [" << getLineNumber() << "] ";
-  getType().dump();
-  cerr << "\n";
 }
-
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 4ace049..3fb6526 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -211,7 +211,7 @@ namespace {
     // for each location equivalent Node.
     struct Node {
     private:
-      static unsigned Counter;
+      static volatile sys::cas_flag Counter;
 
     public:
       Value *Val;
@@ -618,7 +618,7 @@ X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true);
 static RegisterAnalysisGroup<AliasAnalysis> Y(X);
 
 // Initialize Timestamp Counter (static).
-unsigned Andersens::Node::Counter = 0;
+volatile llvm::sys::cas_flag Andersens::Node::Counter = 0;
 
 ModulePass *llvm::createAndersensPass() { return new Andersens(); }
 
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index 172a2be..f605783 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -18,9 +18,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lda"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopDependenceAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
 using namespace llvm;
 
 LoopPass *llvm::createLoopDependenceAnalysisPass() {
@@ -32,16 +36,132 @@ R("lda", "Loop Dependence Analysis", false, true);
 char LoopDependenceAnalysis::ID = 0;
 
 //===----------------------------------------------------------------------===//
+//                             Utility Functions
+//===----------------------------------------------------------------------===//
+
+static inline bool IsMemRefInstr(const Value *V) {
+  const Instruction *I = dyn_cast<const Instruction>(V);
+  return I && (I->mayReadFromMemory() || I->mayWriteToMemory());
+}
+
+static void GetMemRefInstrs(
+    const Loop *L, SmallVectorImpl<Instruction*> &memrefs) {
+  for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+      b != be; ++b)
+    for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end();
+        i != ie; ++i)
+      if (IsMemRefInstr(i))
+        memrefs.push_back(i);
+}
+
+static bool IsLoadOrStoreInst(Value *I) {
+  return isa<LoadInst>(I) || isa<StoreInst>(I);
+}
+
+static Value *GetPointerOperand(Value *I) {
+  if (LoadInst *i = dyn_cast<LoadInst>(I))
+    return i->getPointerOperand();
+  if (StoreInst *i = dyn_cast<StoreInst>(I))
+    return i->getPointerOperand();
+  assert(0 && "Value is no load or store instruction!");
+  // Never reached.
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                             Dependence Testing
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::isDependencePair(const Value *x,
+                                              const Value *y) const {
+  return IsMemRefInstr(x) &&
+         IsMemRefInstr(y) &&
+         (cast<const Instruction>(x)->mayWriteToMemory() ||
+          cast<const Instruction>(y)->mayWriteToMemory());
+}
+
+bool LoopDependenceAnalysis::depends(Value *src, Value *dst) {
+  assert(isDependencePair(src, dst) && "Values form no dependence pair!");
+  DOUT << "== LDA test ==\n" << *src << *dst;
+
+  // We only analyse loads and stores; for possible memory accesses by e.g.
+  // free, call, or invoke instructions we conservatively assume dependence.
+  if (!IsLoadOrStoreInst(src) || !IsLoadOrStoreInst(dst))
+    return true;
+
+  Value *srcPtr = GetPointerOperand(src);
+  Value *dstPtr = GetPointerOperand(dst);
+  const Value *srcObj = srcPtr->getUnderlyingObject();
+  const Value *dstObj = dstPtr->getUnderlyingObject();
+  AliasAnalysis::AliasResult alias = AA->alias(
+      srcObj, AA->getTargetData().getTypeStoreSize(srcObj->getType()),
+      dstObj, AA->getTargetData().getTypeStoreSize(dstObj->getType()));
+
+  // If we don't know whether or not the two objects alias, assume dependence.
+  if (alias == AliasAnalysis::MayAlias)
+    return true;
+
+  // If the objects noalias, they are distinct, accesses are independent.
+  if (alias == AliasAnalysis::NoAlias)
+    return false;
+
+  // TODO: the underlying objects MustAlias, test for dependence
+
+  // We couldn't establish a more precise result, so we have to conservatively
+  // assume full dependence.
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
 //                   LoopDependenceAnalysis Implementation
 //===----------------------------------------------------------------------===//
 
 bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
   this->L = L;
+  AA = &getAnalysis<AliasAnalysis>();
   SE = &getAnalysis<ScalarEvolution>();
   return false;
 }
 
 void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequired<ScalarEvolution>();
+  AU.addRequiredTransitive<AliasAnalysis>();
+  AU.addRequiredTransitive<ScalarEvolution>();
+}
+
+static void PrintLoopInfo(
+    raw_ostream &OS, LoopDependenceAnalysis *LDA, const Loop *L) {
+  if (!L->empty()) return; // ignore non-innermost loops
+
+  SmallVector<Instruction*, 8> memrefs;
+  GetMemRefInstrs(L, memrefs);
+
+  OS << "Loop at depth " << L->getLoopDepth() << ", header block: ";
+  WriteAsOperand(OS, L->getHeader(), false);
+  OS << "\n";
+
+  OS << "  Load/store instructions: " << memrefs.size() << "\n";
+  for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+      end = memrefs.end(); x != end; ++x)
+    OS << "\t" << (x - memrefs.begin()) << ": " << **x;
+
+  OS << "  Pairwise dependence results:\n";
+  for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+      end = memrefs.end(); x != end; ++x)
+    for (SmallVector<Instruction*, 8>::const_iterator y = x + 1;
+        y != end; ++y)
+      if (LDA->isDependencePair(*x, *y))
+        OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin())
+           << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent")
+           << "\n";
+}
+
+void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+  // TODO: doc why const_cast is safe
+  PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L);
+}
+
+void LoopDependenceAnalysis::print(std::ostream &OS, const Module *M) const {
+  raw_os_ostream os(OS);
+  print(os, M);
 }
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index a0d3974..bb53589 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -39,7 +39,7 @@ X("loops", "Natural Loop Information", true, true);
 //
 bool LoopInfo::runOnFunction(Function &) {
   releaseMemory();
-  LI->Calculate(getAnalysis<DominatorTree>().getBase());    // Update
+  LI.Calculate(getAnalysis<DominatorTree>().getBase());    // Update
   return false;
 }
 
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 08c25f4..ee03556 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -195,6 +195,9 @@ bool LPPassManager::runOnFunction(Function &F) {
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
     addLoopIntoQueue(*I, LQ);
 
+  if (LQ.empty()) // No loops, skip calling finalizers
+    return false;
+
   // Initialization
   for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
        I != E; ++I) {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index dcb179af..4081562 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -110,7 +110,9 @@ char ScalarEvolution::ID = 0;
 //===----------------------------------------------------------------------===//
 // Implementation of the SCEV class.
 //
+
 SCEV::~SCEV() {}
+
 void SCEV::dump() const {
   print(errs());
   errs() << '\n';
@@ -142,6 +144,10 @@ bool SCEV::isAllOnesValue() const {
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
   SCEV(scCouldNotCompute) {}
 
+void SCEVCouldNotCompute::Profile(FoldingSetNodeID &ID) const {
+  assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
+}
+
 bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
   assert(0 && "Attempt to use a SCEVCouldNotCompute object!");
   return false;
@@ -174,9 +180,15 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) {
 }
 
 const SCEV* ScalarEvolution::getConstant(ConstantInt *V) {
-  SCEVConstant *&R = SCEVConstants[V];
-  if (R == 0) R = new SCEVConstant(V);
-  return R;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scConstant);
+  ID.AddPointer(V);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVConstant>();
+  new (S) SCEVConstant(V);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 const SCEV* ScalarEvolution::getConstant(const APInt& Val) {
@@ -188,6 +200,11 @@ ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
   return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
 }
 
+void SCEVConstant::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(scConstant);
+  ID.AddPointer(V);
+}
+
 const Type *SCEVConstant::getType() const { return V->getType(); }
 
 void SCEVConstant::print(raw_ostream &OS) const {
@@ -198,6 +215,12 @@ SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy,
                            const SCEV* op, const Type *ty)
   : SCEV(SCEVTy), Op(op), Ty(ty) {}
 
+void SCEVCastExpr::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(getSCEVType());
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+}
+
 bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return Op->dominates(BB, DT);
 }
@@ -277,6 +300,13 @@ SCEVCommutativeExpr::replaceSymbolicValuesWithConcrete(
   return this;
 }
 
+void SCEVNAryExpr::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(getSCEVType());
+  ID.AddInteger(Operands.size());
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    ID.AddPointer(Operands[i]);
+}
+
 bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     if (!getOperand(i)->dominates(BB, DT))
@@ -285,6 +315,12 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return true;
 }
 
+void SCEVUDivExpr::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(scUDivExpr);
+  ID.AddPointer(LHS);
+  ID.AddPointer(RHS);
+}
+
 bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
 }
@@ -302,6 +338,14 @@ const Type *SCEVUDivExpr::getType() const {
   return RHS->getType();
 }
 
+void SCEVAddRecExpr::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(scAddRecExpr);
+  ID.AddInteger(Operands.size());
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    ID.AddPointer(Operands[i]);
+  ID.AddPointer(L);
+}
+
 const SCEV *
 SCEVAddRecExpr::replaceSymbolicValuesWithConcrete(const SCEV *Sym,
                                                   const SCEV *Conc,
@@ -345,7 +389,6 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
   return true;
 }
 
-
 void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "{" << *Operands[0];
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -353,6 +396,11 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "}<" << L->getHeader()->getName() + ">";
 }
 
+void SCEVUnknown::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(scUnknown);
+  ID.AddPointer(V);
+}
+
 bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
   // All non-instruction values are loop invariant.  All instructions are loop
   // invariant if they are not contained in the specified loop.
@@ -696,6 +744,7 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
+  // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
     return getConstant(
       cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
@@ -720,9 +769,16 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
     return getAddRecExpr(Operands, AddRec->getLoop());
   }
 
-  SCEVTruncateExpr *&Result = SCEVTruncates[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scTruncate);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVTruncateExpr>();
+  new (S) SCEVTruncateExpr(Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
@@ -733,6 +789,7 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
+  // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) {
     const Type *IntTy = getEffectiveSCEVType(Ty);
     Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy);
@@ -808,9 +865,16 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
       }
     }
 
-  SCEVZeroExtendExpr *&Result = SCEVZeroExtends[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scZeroExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVZeroExtendExpr>();
+  new (S) SCEVZeroExtendExpr(Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
@@ -821,6 +885,7 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
          "This is not a conversion to a SCEVable type!");
   Ty = getEffectiveSCEVType(Ty);
 
+  // Fold if the operand is constant.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) {
     const Type *IntTy = getEffectiveSCEVType(Ty);
     Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy);
@@ -880,9 +945,16 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
       }
     }
 
-  SCEVSignExtendExpr *&Result = SCEVSignExtends[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSignExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVSignExtendExpr>();
+  new (S) SCEVSignExtendExpr(Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
@@ -980,9 +1052,8 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
         SmallVector<const SCEV*, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
         const SCEV* Key = SE.getMulExpr(MulOps);
         std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair =
-          M.insert(std::make_pair(Key, APInt()));
+          M.insert(std::make_pair(Key, NewScale));
         if (Pair.second) {
-          Pair.first->second = NewScale;
           NewOps.push_back(Pair.first->first);
         } else {
           Pair.first->second += NewScale;
@@ -999,9 +1070,8 @@ CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M,
     } else {
       // An ordinary operand. Update the map.
       std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair =
-        M.insert(std::make_pair(Ops[i], APInt()));
+        M.insert(std::make_pair(Ops[i], Scale));
       if (Pair.second) {
-        Pair.first->second = Scale;
         NewOps.push_back(Pair.first->first);
       } else {
         Pair.first->second += Scale;
@@ -1343,11 +1413,17 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
 
   // Okay, it looks like we really DO need an add expr.  Check to see if we
   // already have one, otherwise create a new one.
-  std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
-  SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scAddExpr,
-                                                                 SCEVOps)];
-  if (Result == 0) Result = new SCEVAddExpr(Ops);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddExpr);
+  ID.AddInteger(Ops.size());
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVAddExpr>();
+  new (S) SCEVAddExpr(Ops);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 
@@ -1508,12 +1584,17 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
 
   // Okay, it looks like we really DO need an mul expr.  Check to see if we
   // already have one, otherwise create a new one.
-  std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
-  SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scMulExpr,
-                                                                 SCEVOps)];
-  if (Result == 0)
-    Result = new SCEVMulExpr(Ops);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scMulExpr);
+  ID.AddInteger(Ops.size());
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVMulExpr>();
+  new (S) SCEVMulExpr(Ops);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 /// getUDivExpr - Get a canonical multiply expression, or something simpler if
@@ -1603,9 +1684,16 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
     }
   }
 
-  SCEVUDivExpr *&Result = SCEVUDivs[std::make_pair(LHS, RHS)];
-  if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUDivExpr);
+  ID.AddPointer(LHS);
+  ID.AddPointer(RHS);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVUDivExpr>();
+  new (S) SCEVUDivExpr(LHS, RHS);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 
@@ -1677,10 +1765,18 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
     }
   }
 
-  std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end());
-  SCEVAddRecExpr *&Result = SCEVAddRecExprs[std::make_pair(L, SCEVOps)];
-  if (Result == 0) Result = new SCEVAddRecExpr(Operands, L);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddRecExpr);
+  ID.AddInteger(Operands.size());
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    ID.AddPointer(Operands[i]);
+  ID.AddPointer(L);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
+  new (S) SCEVAddRecExpr(Operands, L);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
@@ -1767,11 +1863,17 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
 
   // Okay, it looks like we really DO need an smax expr.  Check to see if we
   // already have one, otherwise create a new one.
-  std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
-  SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scSMaxExpr,
-                                                                 SCEVOps)];
-  if (Result == 0) Result = new SCEVSMaxExpr(Ops);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSMaxExpr);
+  ID.AddInteger(Ops.size());
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVSMaxExpr>();
+  new (S) SCEVSMaxExpr(Ops);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
@@ -1858,11 +1960,17 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
 
   // Okay, it looks like we really DO need a umax expr.  Check to see if we
   // already have one, otherwise create a new one.
-  std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
-  SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scUMaxExpr,
-                                                                 SCEVOps)];
-  if (Result == 0) Result = new SCEVUMaxExpr(Ops);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUMaxExpr);
+  ID.AddInteger(Ops.size());
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVUMaxExpr>();
+  new (S) SCEVUMaxExpr(Ops);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
@@ -1883,9 +1991,15 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) {
   // interesting possibilities, and any other code that calls getUnknown
   // is doing so in order to hide a value from SCEV canonicalization.
 
-  SCEVUnknown *&Result = SCEVUnknowns[V];
-  if (Result == 0) Result = new SCEVUnknown(V);
-  return Result;
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUnknown);
+  ID.AddPointer(V);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = SCEVAllocator.Allocate<SCEVUnknown>();
+  new (S) SCEVUnknown(V);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1939,7 +2053,7 @@ const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
 }
 
 const SCEV* ScalarEvolution::getCouldNotCompute() {
-  return CouldNotCompute;
+  return &CouldNotCompute;
 }
 
 /// hasSCEV - Return true if the SCEV for this value has already been
@@ -2750,7 +2864,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
     BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
   if (Pair.second) {
     BackedgeTakenInfo ItCount = ComputeBackedgeTakenCount(L);
-    if (ItCount.Exact != CouldNotCompute) {
+    if (ItCount.Exact != getCouldNotCompute()) {
       assert(ItCount.Exact->isLoopInvariant(L) &&
              ItCount.Max->isLoopInvariant(L) &&
              "Computed trip count isn't loop invariant for loop!");
@@ -2759,7 +2873,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
       // Update the value in the map.
       Pair.first->second = ItCount;
     } else {
-      if (ItCount.Max != CouldNotCompute)
+      if (ItCount.Max != getCouldNotCompute())
         // Update the value in the map.
         Pair.first->second = ItCount;
       if (isa<PHINode>(L->getHeader()->begin()))
@@ -2825,27 +2939,27 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   L->getExitingBlocks(ExitingBlocks);
 
   // Examine all exits and pick the most conservative values.
-  const SCEV* BECount = CouldNotCompute;
-  const SCEV* MaxBECount = CouldNotCompute;
+  const SCEV* BECount = getCouldNotCompute();
+  const SCEV* MaxBECount = getCouldNotCompute();
   bool CouldNotComputeBECount = false;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BackedgeTakenInfo NewBTI =
       ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
 
-    if (NewBTI.Exact == CouldNotCompute) {
+    if (NewBTI.Exact == getCouldNotCompute()) {
       // We couldn't compute an exact value for this exit, so
       // we won't be able to compute an exact value for the loop.
       CouldNotComputeBECount = true;
-      BECount = CouldNotCompute;
+      BECount = getCouldNotCompute();
     } else if (!CouldNotComputeBECount) {
-      if (BECount == CouldNotCompute)
+      if (BECount == getCouldNotCompute())
         BECount = NewBTI.Exact;
       else
         BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
     }
-    if (MaxBECount == CouldNotCompute)
+    if (MaxBECount == getCouldNotCompute())
       MaxBECount = NewBTI.Max;
-    else if (NewBTI.Max != CouldNotCompute)
+    else if (NewBTI.Max != getCouldNotCompute())
       MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
   }
 
@@ -2863,7 +2977,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
   //
   // FIXME: we should be able to handle switch instructions (with a single exit)
   BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (ExitBr == 0) return CouldNotCompute;
+  if (ExitBr == 0) return getCouldNotCompute();
   assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
 
   // At this point, we know we have a conditional branch that determines whether
@@ -2892,7 +3006,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
     for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
       BasicBlock *Pred = BB->getUniquePredecessor();
       if (!Pred)
-        return CouldNotCompute;
+        return getCouldNotCompute();
       TerminatorInst *PredTerm = Pred->getTerminator();
       for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
         BasicBlock *PredSucc = PredTerm->getSuccessor(i);
@@ -2901,7 +3015,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
         // If the predecessor has a successor that isn't BB and isn't
         // outside the loop, assume the worst.
         if (L->contains(PredSucc))
-          return CouldNotCompute;
+          return getCouldNotCompute();
       }
       if (Pred == L->getHeader()) {
         Ok = true;
@@ -2910,7 +3024,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
       BB = Pred;
     }
     if (!Ok)
-      return CouldNotCompute;
+      return getCouldNotCompute();
   }
 
   // Procede to the next level to examine the exit condition expression.
@@ -2935,27 +3049,30 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
       BackedgeTakenInfo BTI1 =
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
-      const SCEV* BECount = CouldNotCompute;
-      const SCEV* MaxBECount = CouldNotCompute;
+      const SCEV* BECount = getCouldNotCompute();
+      const SCEV* MaxBECount = getCouldNotCompute();
       if (L->contains(TBB)) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
-        if (BTI0.Exact == CouldNotCompute || BTI1.Exact == CouldNotCompute)
-          BECount = CouldNotCompute;
+        if (BTI0.Exact == getCouldNotCompute() ||
+            BTI1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
         else
           BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max == CouldNotCompute)
+        if (BTI0.Max == getCouldNotCompute())
           MaxBECount = BTI1.Max;
-        else if (BTI1.Max == CouldNotCompute)
+        else if (BTI1.Max == getCouldNotCompute())
           MaxBECount = BTI0.Max;
         else
           MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
       } else {
         // Both conditions must be true for the loop to exit.
         assert(L->contains(FBB) && "Loop block has no successor in loop!");
-        if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+        if (BTI0.Exact != getCouldNotCompute() &&
+            BTI1.Exact != getCouldNotCompute())
           BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+        if (BTI0.Max != getCouldNotCompute() &&
+            BTI1.Max != getCouldNotCompute())
           MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
       }
 
@@ -2967,27 +3084,30 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
       BackedgeTakenInfo BTI1 =
         ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
-      const SCEV* BECount = CouldNotCompute;
-      const SCEV* MaxBECount = CouldNotCompute;
+      const SCEV* BECount = getCouldNotCompute();
+      const SCEV* MaxBECount = getCouldNotCompute();
       if (L->contains(FBB)) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
-        if (BTI0.Exact == CouldNotCompute || BTI1.Exact == CouldNotCompute)
-          BECount = CouldNotCompute;
+        if (BTI0.Exact == getCouldNotCompute() ||
+            BTI1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
         else
           BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max == CouldNotCompute)
+        if (BTI0.Max == getCouldNotCompute())
           MaxBECount = BTI1.Max;
-        else if (BTI1.Max == CouldNotCompute)
+        else if (BTI1.Max == getCouldNotCompute())
           MaxBECount = BTI0.Max;
         else
           MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
       } else {
         // Both conditions must be false for the loop to exit.
         assert(L->contains(TBB) && "Loop block has no successor in loop!");
-        if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+        if (BTI0.Exact != getCouldNotCompute() &&
+            BTI1.Exact != getCouldNotCompute())
           BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+        if (BTI0.Max != getCouldNotCompute() &&
+            BTI1.Max != getCouldNotCompute())
           MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
       }
 
@@ -3164,11 +3284,11 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
                                                 Constant *RHS,
                                                 const Loop *L,
                                                 ICmpInst::Predicate predicate) {
-  if (LI->isVolatile()) return CouldNotCompute;
+  if (LI->isVolatile()) return getCouldNotCompute();
 
   // Check to see if the loaded pointer is a getelementptr of a global.
   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
-  if (!GEP) return CouldNotCompute;
+  if (!GEP) return getCouldNotCompute();
 
   // Make sure that it is really a constant global we are gepping, with an
   // initializer, and make sure the first IDX is really 0.
@@ -3176,7 +3296,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
   if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
       !cast<Constant>(GEP->getOperand(1))->isNullValue())
-    return CouldNotCompute;
+    return getCouldNotCompute();
 
   // Okay, we allow one non-constant index into the GEP instruction.
   Value *VarIdx = 0;
@@ -3186,7 +3306,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
       Indexes.push_back(CI);
     } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
-      if (VarIdx) return CouldNotCompute;  // Multiple non-constant idx's.
+      if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
       VarIdx = GEP->getOperand(i);
       VarIdxNum = i-2;
       Indexes.push_back(0);
@@ -3203,7 +3323,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
   if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) ||
       !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
       !isa<SCEVConstant>(IdxExpr->getOperand(1)))
-    return CouldNotCompute;
+    return getCouldNotCompute();
 
   unsigned MaxSteps = MaxBruteForceIterations;
   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
@@ -3230,7 +3350,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
       return getConstant(ItCst);   // Found terminating iteration!
     }
   }
-  return CouldNotCompute;
+  return getCouldNotCompute();
 }
 
 
@@ -3371,13 +3491,13 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
 /// constant number of times (the condition evolves only from constants),
 /// try to evaluate a few iterations of the loop until we get the exit
 /// condition gets a value of ExitWhen (true or false).  If we cannot
-/// evaluate the trip count of the loop, return CouldNotCompute.
+/// evaluate the trip count of the loop, return getCouldNotCompute().
 const SCEV *
 ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
                                                        Value *Cond,
                                                        bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
-  if (PN == 0) return CouldNotCompute;
+  if (PN == 0) return getCouldNotCompute();
 
   // Since the loop is canonicalized, the PHI node must have two entries.  One
   // entry must be a constant (coming in from outside of the loop), and the
@@ -3385,11 +3505,11 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
   Constant *StartCST =
     dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
-  if (StartCST == 0) return CouldNotCompute;  // Must be a constant.
+  if (StartCST == 0) return getCouldNotCompute();  // Must be a constant.
 
   Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
   PHINode *PN2 = getConstantEvolvingPHI(BEValue, L);
-  if (PN2 != PN) return CouldNotCompute;  // Not derived from same PHI.
+  if (PN2 != PN) return getCouldNotCompute();  // Not derived from same PHI.
 
   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
   // the loop symbolically to determine when the condition gets a value of
@@ -3402,10 +3522,9 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
       dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal));
 
     // Couldn't symbolically evaluate.
-    if (!CondVal) return CouldNotCompute;
+    if (!CondVal) return getCouldNotCompute();
 
     if (CondVal->getValue() == uint64_t(ExitWhen)) {
-      ConstantEvolutionLoopExitValue[PN] = PHIVal;
       ++NumBruteForceTripCountsComputed;
       return getConstant(Type::Int32Ty, IterationNum);
     }
@@ -3413,12 +3532,12 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
     // Compute the value of the PHI node for the next iteration.
     Constant *NextPHI = EvaluateExpression(BEValue, PHIVal);
     if (NextPHI == 0 || NextPHI == PHIVal)
-      return CouldNotCompute;   // Couldn't evaluate or not making progress...
+      return getCouldNotCompute();// Couldn't evaluate or not making progress...
     PHIVal = NextPHI;
   }
 
   // Too many iterations were needed to evaluate.
-  return CouldNotCompute;
+  return getCouldNotCompute();
 }
 
 /// getSCEVAtScope - Return a SCEV expression handle for the specified value
@@ -3457,7 +3576,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
               Constant *RV = getConstantEvolutionLoopExitValue(PN,
                                                    BTCC->getValue()->getValue(),
                                                                LI);
-              if (RV) return getUnknown(RV);
+              if (RV) return getSCEV(RV);
             }
           }
 
@@ -3471,7 +3590,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
         std::pair<std::map<const Loop *, Constant *>::iterator, bool> Pair =
           Values.insert(std::make_pair(L, static_cast<Constant *>(0)));
         if (!Pair.second)
-          return Pair.first->second ? &*getUnknown(Pair.first->second) : V;
+          return Pair.first->second ? &*getSCEV(Pair.first->second) : V;
 
         std::vector<Constant*> Operands;
         Operands.reserve(I->getNumOperands());
@@ -3520,7 +3639,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
           C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
                                        &Operands[0], Operands.size());
         Pair.first->second = C;
-        return getUnknown(C);
+        return getSCEV(C);
       }
     }
 
@@ -3574,7 +3693,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
       // To evaluate this recurrence, we need to know how many times the AddRec
       // loop iterates.  Compute this now.
       const SCEV* BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
-      if (BackedgeTakenCount == CouldNotCompute) return AddRec;
+      if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
 
       // Then, evaluate the AddRec.
       return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
@@ -3729,12 +3848,12 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
     if (C->getValue()->isZero()) return C;
-    return CouldNotCompute;  // Otherwise it will loop infinitely.
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
 
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
   if (!AddRec || AddRec->getLoop() != L)
-    return CouldNotCompute;
+    return getCouldNotCompute();
 
   if (AddRec->isAffine()) {
     // If this is an affine expression, the execution count of this branch is
@@ -3798,7 +3917,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
     }
   }
 
-  return CouldNotCompute;
+  return getCouldNotCompute();
 }
 
 /// HowFarToNonZero - Return the number of times a backedge checking the
@@ -3814,12 +3933,12 @@ const SCEV* ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     if (!C->getValue()->isNullValue())
       return getIntegerSCEV(0, C->getType());
-    return CouldNotCompute;  // Otherwise it will loop infinitely.
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
 
   // We could implement others, but I really doubt anyone writes loops like
   // this, and if they did, they would already be constant folded.
-  return CouldNotCompute;
+  return getCouldNotCompute();
 }
 
 /// getLoopPredecessor - If the given loop's header has exactly one unique
@@ -4037,7 +4156,7 @@ const SCEV* ScalarEvolution::getBECount(const SCEV* Start,
     getAddExpr(getZeroExtendExpr(Diff, WideTy),
                getZeroExtendExpr(RoundUp, WideTy));
   if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
-    return CouldNotCompute;
+    return getCouldNotCompute();
 
   return getUDivExpr(Add, Step);
 }
@@ -4049,11 +4168,11 @@ ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool isSigned) {
   // Only handle:  "ADDREC < LoopInvariant".
-  if (!RHS->isLoopInvariant(L)) return CouldNotCompute;
+  if (!RHS->isLoopInvariant(L)) return getCouldNotCompute();
 
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
   if (!AddRec || AddRec->getLoop() != L)
-    return CouldNotCompute;
+    return getCouldNotCompute();
 
   if (AddRec->isAffine()) {
     // FORNOW: We only support unit strides.
@@ -4063,7 +4182,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // TODO: handle non-constant strides.
     const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
     if (!CStep || CStep->isZero())
-      return CouldNotCompute;
+      return getCouldNotCompute();
     if (CStep->isOne()) {
       // With unit stride, the iteration never steps past the limit value.
     } else if (CStep->getValue()->getValue().isStrictlyPositive()) {
@@ -4074,19 +4193,19 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
           APInt Max = APInt::getSignedMaxValue(BitWidth);
           if ((Max - CStep->getValue()->getValue())
                 .slt(CLimit->getValue()->getValue()))
-            return CouldNotCompute;
+            return getCouldNotCompute();
         } else {
           APInt Max = APInt::getMaxValue(BitWidth);
           if ((Max - CStep->getValue()->getValue())
                 .ult(CLimit->getValue()->getValue()))
-            return CouldNotCompute;
+            return getCouldNotCompute();
         }
       } else
         // TODO: handle non-constant limit values below.
-        return CouldNotCompute;
+        return getCouldNotCompute();
     } else
       // TODO: handle negative strides below.
-      return CouldNotCompute;
+      return getCouldNotCompute();
 
     // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
     // m.  So, we count the number of iterations in which {n,+,s} < m is true.
@@ -4126,12 +4245,12 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 
     // The maximum backedge count is similar, except using the minimum start
     // value and the maximum end value.
-    const SCEV* MaxBECount = getBECount(MinStart, MaxEnd, Step);;
+    const SCEV* MaxBECount = getBECount(MinStart, MaxEnd, Step);
 
     return BackedgeTakenInfo(BECount, MaxBECount);
   }
 
-  return CouldNotCompute;
+  return getCouldNotCompute();
 }
 
 /// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -4319,7 +4438,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute()) {
+  : FunctionPass(&ID) {
 }
 
 bool ScalarEvolution::runOnFunction(Function &F) {
@@ -4334,45 +4453,8 @@ void ScalarEvolution::releaseMemory() {
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
   ValuesAtScopes.clear();
-
-  for (std::map<ConstantInt*, SCEVConstant*>::iterator
-       I = SCEVConstants.begin(), E = SCEVConstants.end(); I != E; ++I)
-    delete I->second;
-  for (std::map<std::pair<const SCEV*, const Type*>,
-       SCEVTruncateExpr*>::iterator I = SCEVTruncates.begin(),
-       E = SCEVTruncates.end(); I != E; ++I)
-    delete I->second;
-  for (std::map<std::pair<const SCEV*, const Type*>,
-       SCEVZeroExtendExpr*>::iterator I = SCEVZeroExtends.begin(),
-       E = SCEVZeroExtends.end(); I != E; ++I)
-    delete I->second;
-  for (std::map<std::pair<unsigned, std::vector<const SCEV*> >,
-       SCEVCommutativeExpr*>::iterator I = SCEVCommExprs.begin(),
-       E = SCEVCommExprs.end(); I != E; ++I)
-    delete I->second;
-  for (std::map<std::pair<const SCEV*, const SCEV*>, SCEVUDivExpr*>::iterator
-       I = SCEVUDivs.begin(), E = SCEVUDivs.end(); I != E; ++I)
-    delete I->second;
-  for (std::map<std::pair<const SCEV*, const Type*>,
-       SCEVSignExtendExpr*>::iterator I =  SCEVSignExtends.begin(),
-       E = SCEVSignExtends.end(); I != E; ++I)
-    delete I->second;
-  for (std::map<std::pair<const Loop *, std::vector<const SCEV*> >,
-       SCEVAddRecExpr*>::iterator I = SCEVAddRecExprs.begin(),
-       E = SCEVAddRecExprs.end(); I != E; ++I)
-    delete I->second;
-  for (std::map<Value*, SCEVUnknown*>::iterator I = SCEVUnknowns.begin(),
-       E = SCEVUnknowns.end(); I != E; ++I)
-    delete I->second;
-
-  SCEVConstants.clear();
-  SCEVTruncates.clear();
-  SCEVZeroExtends.clear();
-  SCEVCommExprs.clear();
-  SCEVUDivs.clear();
-  SCEVSignExtends.clear();
-  SCEVAddRecExprs.clear();
-  SCEVUnknowns.clear();
+  UniqueSCEVs.clear();
+  SCEVAllocator.Reset();
 }
 
 void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 4cc5ebc..729a0c3 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,16 +19,24 @@
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
-/// InsertCastOfTo - Insert a cast of V to the specified type, doing what
-/// we can to share the casts.
-Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V, 
-                                    const Type *Ty) {
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
+  Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+  assert((Op == Instruction::BitCast ||
+          Op == Instruction::PtrToInt ||
+          Op == Instruction::IntToPtr) &&
+         "InsertNoopCastOfTo cannot perform non-noop casts!");
+  assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+         "InsertNoopCastOfTo cannot change sizes!");
+
   // Short-circuit unnecessary bitcasts.
-  if (opcode == Instruction::BitCast && V->getType() == Ty)
+  if (Op == Instruction::BitCast && V->getType() == Ty)
     return V;
 
   // Short-circuit unnecessary inttoptr<->ptrtoint casts.
-  if ((opcode == Instruction::PtrToInt || opcode == Instruction::IntToPtr) &&
+  if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
       SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
     if (CastInst *CI = dyn_cast<CastInst>(V))
       if ((CI->getOpcode() == Instruction::PtrToInt ||
@@ -46,7 +54,7 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
 
   // FIXME: keep track of the cast instruction.
   if (Constant *C = dyn_cast<Constant>(V))
-    return ConstantExpr::getCast(opcode, C, Ty);
+    return ConstantExpr::getCast(Op, C, Ty);
   
   if (Argument *A = dyn_cast<Argument>(V)) {
     // Check to see if there is already a cast!
@@ -54,7 +62,7 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
          UI != E; ++UI)
       if ((*UI)->getType() == Ty)
         if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
-          if (CI->getOpcode() == opcode) {
+          if (CI->getOpcode() == Op) {
             // If the cast isn't the first instruction of the function, move it.
             if (BasicBlock::iterator(CI) !=
                 A->getParent()->getEntryBlock().begin()) {
@@ -62,7 +70,7 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
               // The old cast is left in place in case it is being used
               // as an insert point.
               Instruction *NewCI =
-                CastInst::Create(opcode, V, Ty, "",
+                CastInst::Create(Op, V, Ty, "",
                                  A->getParent()->getEntryBlock().begin());
               NewCI->takeName(CI);
               CI->replaceAllUsesWith(NewCI);
@@ -71,7 +79,7 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
             return CI;
           }
 
-    Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(),
+    Instruction *I = CastInst::Create(Op, V, Ty, V->getName(),
                                       A->getParent()->getEntryBlock().begin());
     InsertedValues.insert(I);
     return I;
@@ -84,7 +92,7 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
        UI != E; ++UI) {
     if ((*UI)->getType() == Ty)
       if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
-        if (CI->getOpcode() == opcode) {
+        if (CI->getOpcode() == Op) {
           BasicBlock::iterator It = I; ++It;
           if (isa<InvokeInst>(I))
             It = cast<InvokeInst>(I)->getNormalDest()->begin();
@@ -93,7 +101,7 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
             // Recreate the cast at the beginning of the entry block.
             // The old cast is left in place in case it is being used
             // as an insert point.
-            Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It);
+            Instruction *NewCI = CastInst::Create(Op, V, Ty, "", It);
             NewCI->takeName(CI);
             CI->replaceAllUsesWith(NewCI);
             return NewCI;
@@ -105,28 +113,15 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
   if (InvokeInst *II = dyn_cast<InvokeInst>(I))
     IP = II->getNormalDest()->begin();
   while (isa<PHINode>(IP)) ++IP;
-  Instruction *CI = CastInst::Create(opcode, V, Ty, V->getName(), IP);
+  Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP);
   InsertedValues.insert(CI);
   return CI;
 }
 
-/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
-/// which must be possible with a noop cast.
-Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
-  Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
-  assert((Op == Instruction::BitCast ||
-          Op == Instruction::PtrToInt ||
-          Op == Instruction::IntToPtr) &&
-         "InsertNoopCastOfTo cannot perform non-noop casts!");
-  assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
-         "InsertNoopCastOfTo cannot change sizes!");
-  return InsertCastOfTo(Op, V, Ty);
-}
-
 /// InsertBinop - Insert the specified binary operator, doing a small amount
 /// of work to avoid inserting an obviously redundant operation.
-Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, Value *LHS,
-                                 Value *RHS, BasicBlock::iterator InsertPt) {
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+                                 Value *LHS, Value *RHS) {
   // Fold a binop with constant operands.
   if (Constant *CLHS = dyn_cast<Constant>(LHS))
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
@@ -134,10 +129,10 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, Value *LHS,
 
   // Do a quick scan to see if we have this binop nearby.  If so, reuse it.
   unsigned ScanLimit = 6;
-  BasicBlock::iterator BlockBegin = InsertPt->getParent()->begin();
-  if (InsertPt != BlockBegin) {
-    // Scanning starts from the last instruction before InsertPt.
-    BasicBlock::iterator IP = InsertPt;
+  BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+  // Scanning starts from the last instruction before the insertion point.
+  BasicBlock::iterator IP = Builder.GetInsertPoint();
+  if (IP != BlockBegin) {
     --IP;
     for (; ScanLimit; --IP, --ScanLimit) {
       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
@@ -146,9 +141,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, Value *LHS,
       if (IP == BlockBegin) break;
     }
   }
-  
+
   // If we haven't found this binop, insert it.
-  Instruction *BO = BinaryOperator::Create(Opcode, LHS, RHS, "tmp", InsertPt);
+  Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
   InsertedValues.insert(BO);
   return BO;
 }
@@ -190,8 +185,9 @@ static bool FactorOutConstant(const SCEV* &S,
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
       if (!C->getValue()->getValue().srem(Factor)) {
-        const SmallVectorImpl<const SCEV*> &MOperands = M->getOperands();
-        SmallVector<const SCEV*, 4> NewMulOps(MOperands.begin(), MOperands.end());
+        const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
+        SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
+                                               MOperands.end());
         NewMulOps[0] =
           SE.getConstant(C->getValue()->getValue().sdiv(Factor));
         S = SE.getMulExpr(NewMulOps);
@@ -336,10 +332,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
 
     // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
     unsigned ScanLimit = 6;
-    BasicBlock::iterator BlockBegin = InsertPt->getParent()->begin();
-    if (InsertPt != BlockBegin) {
-      // Scanning starts from the last instruction before InsertPt.
-      BasicBlock::iterator IP = InsertPt;
+    BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+    // Scanning starts from the last instruction before the insertion point.
+    BasicBlock::iterator IP = Builder.GetInsertPoint();
+    if (IP != BlockBegin) {
       --IP;
       for (; ScanLimit; --IP, --ScanLimit) {
         if (IP->getOpcode() == Instruction::GetElementPtr &&
@@ -349,16 +345,16 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin,
       }
     }
 
-    Value *GEP = GetElementPtrInst::Create(V, Idx, "scevgep", InsertPt);
+    Value *GEP = Builder.CreateGEP(V, Idx, "scevgep");
     InsertedValues.insert(GEP);
     return GEP;
   }
 
   // Insert a pretty getelementptr.
-  Value *GEP = GetElementPtrInst::Create(V,
-                                         GepIndices.begin(),
-                                         GepIndices.end(),
-                                         "scevgep", InsertPt);
+  Value *GEP = Builder.CreateGEP(V,
+                                 GepIndices.begin(),
+                                 GepIndices.end(),
+                                 "scevgep");
   Ops.push_back(SE.getUnknown(GEP));
   InsertedValues.insert(GEP);
   return expand(SE.getAddExpr(Ops));
@@ -373,8 +369,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   if (SE.TD)
     if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
       const SmallVectorImpl<const SCEV*> &Ops = S->getOperands();
-      return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1],
-                            PTy, Ty, V);
+      return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], PTy, Ty, V);
     }
 
   V = InsertNoopCastOfTo(V, Ty);
@@ -382,7 +377,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   // Emit a bunch of add instructions
   for (int i = S->getNumOperands()-2; i >= 0; --i) {
     Value *W = expandCodeFor(S->getOperand(i), Ty);
-    V = InsertBinop(Instruction::Add, V, W, InsertPt);
+    V = InsertBinop(Instruction::Add, V, W);
   }
   return V;
 }
@@ -400,12 +395,12 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
   // Emit a bunch of multiply instructions
   for (; i >= FirstOp; --i) {
     Value *W = expandCodeFor(S->getOperand(i), Ty);
-    V = InsertBinop(Instruction::Mul, V, W, InsertPt);
+    V = InsertBinop(Instruction::Mul, V, W);
   }
 
   // -1 * ...  --->  0 - ...
   if (FirstOp == 1)
-    V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V, InsertPt);
+    V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V);
   return V;
 }
 
@@ -417,12 +412,11 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
     const APInt &RHS = SC->getValue()->getValue();
     if (RHS.isPowerOf2())
       return InsertBinop(Instruction::LShr, LHS,
-                         ConstantInt::get(Ty, RHS.logBase2()),
-                         InsertPt);
+                         ConstantInt::get(Ty, RHS.logBase2()));
   }
 
   Value *RHS = expandCodeFor(S->getRHS(), Ty);
-  return InsertBinop(Instruction::UDiv, LHS, RHS, InsertPt);
+  return InsertBinop(Instruction::UDiv, LHS, RHS);
 }
 
 /// Move parts of Base into Rest to leave Base with the minimal
@@ -463,18 +457,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   if (CanonicalIV &&
       SE.getTypeSizeInBits(CanonicalIV->getType()) >
       SE.getTypeSizeInBits(Ty)) {
-    const SCEV* Start = SE.getAnyExtendExpr(S->getStart(),
+    const SCEV *Start = SE.getAnyExtendExpr(S->getStart(),
+                                            CanonicalIV->getType());
+    const SCEV *Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
                                            CanonicalIV->getType());
-    const SCEV* Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
-                                          CanonicalIV->getType());
     Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
-    BasicBlock::iterator SaveInsertPt = InsertPt;
+    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
     BasicBlock::iterator NewInsertPt =
       next(BasicBlock::iterator(cast<Instruction>(V)));
     while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
-    InsertPt = SaveInsertPt;
+    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
     return V;
   }
 
@@ -524,9 +519,10 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     // Create and insert the PHI node for the induction variable in the
     // specified loop.
     BasicBlock *Header = L->getHeader();
+    BasicBlock *Preheader = L->getLoopPreheader();
     PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
     InsertedValues.insert(PN);
-    PN->addIncoming(Constant::getNullValue(Ty), L->getLoopPreheader());
+    PN->addIncoming(Constant::getNullValue(Ty), Preheader);
 
     pred_iterator HPI = pred_begin(Header);
     assert(HPI != pred_end(Header) && "Loop with zero preds???");
@@ -542,7 +538,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     InsertedValues.insert(Add);
 
     pred_iterator PI = pred_begin(Header);
-    if (*PI == L->getLoopPreheader())
+    if (*PI == Preheader)
       ++PI;
     PN->addIncoming(Add, *PI);
     return PN;
@@ -587,7 +583,7 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
-  Instruction *I = new TruncInst(V, Ty, "tmp.", InsertPt);
+  Value *I = Builder.CreateTrunc(V, Ty, "tmp");
   InsertedValues.insert(I);
   return I;
 }
@@ -596,7 +592,7 @@ Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
-  Instruction *I = new ZExtInst(V, Ty, "tmp.", InsertPt);
+  Value *I = Builder.CreateZExt(V, Ty, "tmp");
   InsertedValues.insert(I);
   return I;
 }
@@ -605,7 +601,7 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
-  Instruction *I = new SExtInst(V, Ty, "tmp.", InsertPt);
+  Value *I = Builder.CreateSExt(V, Ty, "tmp");
   InsertedValues.insert(I);
   return I;
 }
@@ -615,10 +611,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
   Value *LHS = expandCodeFor(S->getOperand(0), Ty);
   for (unsigned i = 1; i < S->getNumOperands(); ++i) {
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-    Instruction *ICmp =
-      new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS, "tmp", InsertPt);
+    Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
     InsertedValues.insert(ICmp);
-    Instruction *Sel = SelectInst::Create(ICmp, LHS, RHS, "smax", InsertPt);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
     InsertedValues.insert(Sel);
     LHS = Sel;
   }
@@ -630,10 +625,9 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
   Value *LHS = expandCodeFor(S->getOperand(0), Ty);
   for (unsigned i = 1; i < S->getNumOperands(); ++i) {
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-    Instruction *ICmp =
-      new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS, "tmp", InsertPt);
+    Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
     InsertedValues.insert(ICmp);
-    Instruction *Sel = SelectInst::Create(ICmp, LHS, RHS, "umax", InsertPt);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
     InsertedValues.insert(Sel);
     LHS = Sel;
   }
@@ -652,11 +646,10 @@ Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) {
 }
 
 Value *SCEVExpander::expand(const SCEV *S) {
-  BasicBlock::iterator SaveInsertPt = InsertPt;
-
   // Compute an insertion point for this SCEV object. Hoist the instructions
   // as far out in the loop nest as possible.
-  for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ;
+  Instruction *InsertPt = Builder.GetInsertPoint();
+  for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
        L = L->getParentLoop())
     if (S->isLoopInvariant(L)) {
       if (!L) break;
@@ -668,7 +661,8 @@ Value *SCEVExpander::expand(const SCEV *S) {
       // there) so that it is guaranteed to dominate any user inside the loop.
       if (L && S->hasComputableLoopEvolution(L))
         InsertPt = L->getHeader()->getFirstNonPHI();
-      while (isInsertedInstruction(InsertPt)) ++InsertPt;
+      while (isInsertedInstruction(InsertPt))
+        InsertPt = next(BasicBlock::iterator(InsertPt));
       break;
     }
 
@@ -676,10 +670,12 @@ Value *SCEVExpander::expand(const SCEV *S) {
   std::map<std::pair<const SCEV *, Instruction *>,
            AssertingVH<Value> >::iterator I =
     InsertedExpressions.find(std::make_pair(S, InsertPt));
-  if (I != InsertedExpressions.end()) {
-    InsertPt = SaveInsertPt;
+  if (I != InsertedExpressions.end())
     return I->second;
-  }
+
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
 
   // Expand the expression into instructions.
   Value *V = visit(S);
@@ -687,7 +683,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
   // Remember the expanded value for this SCEV at this location.
   InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
-  InsertPt = SaveInsertPt;
+  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
 }
 
@@ -701,8 +697,10 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
   assert(Ty->isInteger() && "Can only insert integer induction variables!");
   const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
                                    SE.getIntegerSCEV(1, Ty), L);
-  BasicBlock::iterator SaveInsertPt = InsertPt;
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
   Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
-  InsertPt = SaveInsertPt;
+  if (SaveInsertBB)
+    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 7509e91..07a18fe 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -249,7 +249,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
   case Instruction::BitCast: {
     const Type *SrcTy = I->getOperand(0)->getType();
-    if (SrcTy->isInteger() || isa<PointerType>(SrcTy)) {
+    if ((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+        // TODO: For now, not handling conversions like:
+        // (bitcast i64 %x to <2 x i32>)
+        !isa<VectorType>(I->getType())) {
       ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
                         Depth+1);
       return;
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index c6c89d2..bb57262 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -138,9 +138,9 @@ bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
 // Archive constructor - this is the only constructor that gets used for the
 // Archive class. Everything else (default,copy) is deprecated. This just
 // initializes and maps the file into memory, if requested.
-Archive::Archive(const sys::Path& filename)
+Archive::Archive(const sys::Path& filename, LLVMContext& C)
   : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(),
-    symTabSize(0), firstFileOffset(0), modules(), foreignST(0) {
+    symTabSize(0), firstFileOffset(0), modules(), foreignST(0), Context(C) {
 }
 
 bool
@@ -208,6 +208,7 @@ static void getSymbols(Module*M, std::vector<std::string>& symbols) {
 
 // Get just the externally visible defined symbols from the bitcode
 bool llvm::GetBitcodeSymbols(const sys::Path& fName,
+                             LLVMContext& Context,
                              std::vector<std::string>& symbols,
                              std::string* ErrMsg) {
   std::auto_ptr<MemoryBuffer> Buffer(
@@ -217,7 +218,7 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
     return true;
   }
   
-  ModuleProvider *MP = getBitcodeModuleProvider(Buffer.get(), ErrMsg);
+  ModuleProvider *MP = getBitcodeModuleProvider(Buffer.get(), Context, ErrMsg);
   if (!MP)
     return true;
   
@@ -239,13 +240,14 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
 ModuleProvider*
 llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length,
                         const std::string& ModuleID,
+                        LLVMContext& Context,
                         std::vector<std::string>& symbols,
                         std::string* ErrMsg) {
   // Get the module provider
   MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str());
   memcpy((char*)Buffer->getBufferStart(), BufPtr, Length);
   
-  ModuleProvider *MP = getBitcodeModuleProvider(Buffer, ErrMsg);
+  ModuleProvider *MP = getBitcodeModuleProvider(Buffer, Context, ErrMsg);
   if (!MP)
     return 0;
   
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
index 7ba3024..d187ed9 100644
--- a/lib/Archive/ArchiveInternals.h
+++ b/lib/Archive/ArchiveInternals.h
@@ -31,6 +31,8 @@
 
 namespace llvm {
 
+  class LLVMContext;
+
   /// The ArchiveMemberHeader structure is used internally for bitcode
   /// archives.
   /// The header precedes each file member in the archive. This structure is
@@ -71,11 +73,13 @@ namespace llvm {
   
   // Get just the externally visible defined symbols from the bitcode
   bool GetBitcodeSymbols(const sys::Path& fName,
+                          LLVMContext& Context,
                           std::vector<std::string>& symbols,
                           std::string* ErrMsg);
   
   ModuleProvider* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length,
                                     const std::string& ModuleID,
+                                    LLVMContext& Context,
                                     std::vector<std::string>& symbols,
                                     std::string* ErrMsg);
 }
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index b07e884..718d446 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -327,9 +327,9 @@ Archive::loadArchive(std::string* error) {
 
 // Open and completely load the archive file.
 Archive*
-Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) 
-{
-  std::auto_ptr<Archive> result ( new Archive(file));
+Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, 
+                     std::string* ErrorMessage) {
+  std::auto_ptr<Archive> result ( new Archive(file, C));
   if (result->mapToMemory(ErrorMessage))
     return 0;
   if (!result->loadArchive(ErrorMessage))
@@ -339,7 +339,8 @@ Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage)
 
 // Get all the bitcode modules from the archive
 bool
-Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) {
+Archive::getAllModules(std::vector<Module*>& Modules,
+                       std::string* ErrMessage) {
 
   for (iterator I=begin(), E=end(); I != E; ++I) {
     if (I->isBitcode()) {
@@ -349,7 +350,7 @@ Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) {
         MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
       memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
       
-      Module *M = ParseBitcodeFile(Buffer, ErrMessage);
+      Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
       delete Buffer;
       if (!M)
         return true;
@@ -440,9 +441,10 @@ Archive::loadSymbolTable(std::string* ErrorMsg) {
 }
 
 // Open the archive and load just the symbol tables
-Archive*
-Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) {
-  std::auto_ptr<Archive> result ( new Archive(file) );
+Archive* Archive::OpenAndLoadSymbols(const sys::Path& file,
+                                     LLVMContext& C,
+                                     std::string* ErrorMessage) {
+  std::auto_ptr<Archive> result ( new Archive(file, C) );
   if (result->mapToMemory(ErrorMessage))
     return 0;
   if (!result->loadSymbolTable(ErrorMessage))
@@ -488,7 +490,7 @@ Archive::findModuleDefiningSymbol(const std::string& symbol,
                                                       FullMemberName.c_str());
   memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize());
   
-  ModuleProvider *mp = getBitcodeModuleProvider(Buffer, ErrMsg);
+  ModuleProvider *mp = getBitcodeModuleProvider(Buffer, Context, ErrMsg);
   if (!mp)
     return 0;
 
@@ -536,7 +538,7 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
           mbr->getPath().toString() + ")";
         ModuleProvider* MP = 
           GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(),
-                            FullMemberName, symbols, error);
+                            FullMemberName, Context, symbols, error);
 
         if (MP) {
           // Insert the module's symbols into the symbol table
@@ -615,7 +617,7 @@ bool Archive::isBitcodeArchive() {
     MemoryBuffer *Buffer =
       MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str());
     memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
-    Module *M = ParseBitcodeFile(Buffer);
+    Module *M = ParseBitcodeFile(Buffer, Context);
     delete Buffer;
     if (!M)
       return false;  // Couldn't parse bitcode, not a bitcode archive.
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index cebb087..881d75b 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -64,9 +64,8 @@ static inline unsigned numVbrBytes(unsigned num) {
 }
 
 // Create an empty archive.
-Archive*
-Archive::CreateEmpty(const sys::Path& FilePath ) {
-  Archive* result = new Archive(FilePath);
+Archive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) {
+  Archive* result = new Archive(FilePath, C);
   return result;
 }
 
@@ -229,7 +228,7 @@ Archive::writeMember(
       + ")";
     ModuleProvider* MP = 
       GetBitcodeSymbols((const unsigned char*)data,fSize,
-                        FullMemberName, symbols, ErrMsg);
+                        FullMemberName, Context, symbols, ErrMsg);
 
     // If the bitcode parsed successfully
     if ( MP ) {
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index b3f7cdb..741c538 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Instruction.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Assembly/Parser.h"
 #include <cstdlib>
@@ -23,23 +24,7 @@
 using namespace llvm;
 
 bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const {
-  // Scan backward to find the start of the line.
-  const char *LineStart = ErrorLoc;
-  while (LineStart != CurBuf->getBufferStart() &&
-         LineStart[-1] != '\n' && LineStart[-1] != '\r')
-    --LineStart;
-  // Get the end of the line.
-  const char *LineEnd = ErrorLoc;
-  while (LineEnd != CurBuf->getBufferEnd() &&
-         LineEnd[0] != '\n' && LineEnd[0] != '\r')
-    ++LineEnd;
-
-  unsigned LineNo = 1;
-  for (const char *FP = CurBuf->getBufferStart(); FP != ErrorLoc; ++FP)
-    if (*FP == '\n') ++LineNo;
-
-  std::string LineContents(LineStart, LineEnd);
-  ErrorInfo.setError(Msg, LineNo, ErrorLoc-LineStart, LineContents);
+  ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
   return true;
 }
 
@@ -195,8 +180,8 @@ static const char *isLabelTail(const char *CurPtr) {
 // Lexer definition.
 //===----------------------------------------------------------------------===//
 
-LLLexer::LLLexer(MemoryBuffer *StartBuf, ParseError &Err)
-  : CurBuf(StartBuf), ErrorInfo(Err), APFloatVal(0.0) {
+LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err)
+  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), APFloatVal(0.0) {
   CurPtr = CurBuf->getBufferStart();
 }
 
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 995aa4e..b5e58f1 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -17,17 +17,19 @@
 #include "LLToken.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/Support/SourceMgr.h"
 #include <string>
 
 namespace llvm {
   class MemoryBuffer;
   class Type;
-  class ParseError;
+  class SMDiagnostic;
 
   class LLLexer {
     const char *CurPtr;
     MemoryBuffer *CurBuf;
-    ParseError &ErrorInfo;
+    SMDiagnostic &ErrorInfo;
+    SourceMgr &SM;
 
     // Information about the current token.
     const char *TokStart;
@@ -40,15 +42,15 @@ namespace llvm {
 
     std::string TheError;
   public:
-    explicit LLLexer(MemoryBuffer *StartBuf, ParseError &);
+    explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &);
     ~LLLexer() {}
 
     lltok::Kind Lex() {
       return CurKind = LexToken();
     }
 
-    typedef const char* LocTy;
-    LocTy getLoc() const { return TokStart; }
+    typedef SMLoc LocTy;
+    LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
     lltok::Kind getKind() const { return CurKind; }
     const std::string getStrVal() const { return StrVal; }
     const Type *getTyVal() const { return TyVal; }
@@ -58,7 +60,7 @@ namespace llvm {
 
 
     bool Error(LocTy L, const std::string &Msg) const;
-    bool Error(const std::string &Msg) const { return Error(CurPtr, Msg); }
+    bool Error(const std::string &Msg) const { return Error(getLoc(), Msg); }
     std::string getFilename() const;
 
   private:
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 909370c..3966ab3 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -18,6 +18,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/MDNode.h"
 #include "llvm/Module.h"
 #include "llvm/ValueSymbolTable.h"
@@ -109,6 +110,7 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
     case lltok::LocalVar:   if (ParseNamedType()) return true; break;
     case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
+    case lltok::Metadata:   if (ParseStandaloneMetadata()) return true; break;
 
     // The Global variable production with no name can have many different
     // optional leading prefixes, the production is:
@@ -129,7 +131,7 @@ bool LLParser::ParseTopLevelEntities() {
       unsigned Linkage, Visibility;
       if (ParseOptionalLinkage(Linkage) ||
           ParseOptionalVisibility(Visibility) ||
-          ParseGlobal("", 0, Linkage, true, Visibility))
+          ParseGlobal("", SMLoc(), Linkage, true, Visibility))
         return true;
       break;
     }
@@ -138,7 +140,7 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::kw_protected: {   // OptionalVisibility
       unsigned Visibility;
       if (ParseOptionalVisibility(Visibility) ||
-          ParseGlobal("", 0, 0, false, Visibility))
+          ParseGlobal("", SMLoc(), 0, false, Visibility))
         return true;
       break;
     }
@@ -147,7 +149,7 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::kw_addrspace:     // OptionalAddrSpace
     case lltok::kw_constant:      // GlobalType
     case lltok::kw_global:        // GlobalType
-      if (ParseGlobal("", 0, 0, false, 0)) return true;
+      if (ParseGlobal("", SMLoc(), 0, false, 0)) return true;
       break;
     }
   }
@@ -355,6 +357,34 @@ bool LLParser::ParseNamedGlobal() {
   return ParseAlias(Name, NameLoc, Visibility);
 }
 
+/// ParseStandaloneMetadata:
+///   !42 = !{...} 
+bool LLParser::ParseStandaloneMetadata() {
+  assert(Lex.getKind() == lltok::Metadata);
+  Lex.Lex();
+  unsigned MetadataID = 0;
+  if (ParseUInt32(MetadataID))
+    return true;
+  if (MetadataCache.find(MetadataID) != MetadataCache.end())
+    return TokError("Metadata id is already used");
+  if (ParseToken(lltok::equal, "expected '=' here"))
+    return true;
+
+  LocTy TyLoc;
+  bool IsConstant;    
+  PATypeHolder Ty(Type::VoidTy);
+  if (ParseGlobalType(IsConstant) ||
+      ParseType(Ty, TyLoc))
+    return true;
+  
+  Constant *Init = 0;
+  if (ParseGlobalValue(Ty, Init))
+      return true;
+
+  MetadataCache[MetadataID] = Init;
+  return false;
+}
+
 /// ParseAlias:
 ///   ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
 /// Aliasee
@@ -968,7 +998,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     break;
   case lltok::kw_opaque:
     // TypeRec ::= 'opaque'
-    Result = OpaqueType::get();
+    Result = Context.getOpaqueType();
     Lex.Lex();
     break;
   case lltok::lbrace:
@@ -998,7 +1028,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     if (const Type *T = M->getTypeByName(Lex.getStrVal())) {
       Result = T;
     } else {
-      Result = OpaqueType::get();
+      Result = Context.getOpaqueType();
       ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(),
                                             std::make_pair(Result,
                                                            Lex.getLoc())));
@@ -1017,7 +1047,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
       if (I != ForwardRefTypeIDs.end())
         Result = I->second.first;
       else {
-        Result = OpaqueType::get();
+        Result = Context.getOpaqueType();
         ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(),
                                                 std::make_pair(Result,
                                                                Lex.getLoc())));
@@ -1030,7 +1060,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     Lex.Lex();
     unsigned Val;
     if (ParseUInt32(Val)) return true;
-    OpaqueType *OT = OpaqueType::get();        // Use temporary placeholder.
+    OpaqueType *OT = Context.getOpaqueType(); //Use temporary placeholder.
     UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT));
     Result = OT;
     break;
@@ -1051,7 +1081,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
         return TokError("pointers to void are invalid; use i8* instead");
       if (!PointerType::isValidElementType(Result.get()))
         return TokError("pointer to this type is invalid");
-      Result = HandleUpRefs(PointerType::getUnqual(Result.get()));
+      Result = HandleUpRefs(Context.getPointerTypeUnqual(Result.get()));
       Lex.Lex();
       break;
 
@@ -1068,7 +1098,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
           ParseToken(lltok::star, "expected '*' in address space"))
         return true;
 
-      Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace));
+      Result = HandleUpRefs(Context.getPointerType(Result.get(), AddrSpace));
       break;
     }
         
@@ -1229,7 +1259,8 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) {
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
     ArgListTy.push_back(ArgList[i].Type);
     
-  Result = HandleUpRefs(FunctionType::get(Result.get(), ArgListTy, isVarArg));
+  Result = HandleUpRefs(Context.getFunctionType(Result.get(),
+                                                ArgListTy, isVarArg));
   return false;
 }
 
@@ -1244,7 +1275,7 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
   Lex.Lex(); // Consume the '{'
   
   if (EatIfPresent(lltok::rbrace)) {
-    Result = StructType::get(std::vector<const Type*>(), Packed);
+    Result = Context.getStructType(Packed);
     return false;
   }
 
@@ -1276,7 +1307,7 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
   std::vector<const Type*> ParamsListTy;
   for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
     ParamsListTy.push_back(ParamsList[i].get());
-  Result = HandleUpRefs(StructType::get(ParamsListTy, Packed));
+  Result = HandleUpRefs(Context.getStructType(ParamsListTy, Packed));
   return false;
 }
 
@@ -1315,11 +1346,11 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
       return Error(SizeLoc, "size too large for vector");
     if (!VectorType::isValidElementType(EltTy))
       return Error(TypeLoc, "vector element type must be fp or integer");
-    Result = VectorType::get(EltTy, unsigned(Size));
+    Result = Context.getVectorType(EltTy, unsigned(Size));
   } else {
     if (!ArrayType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid array element type");
-    Result = HandleUpRefs(ArrayType::get(EltTy, Size));
+    Result = HandleUpRefs(Context.getArrayType(EltTy, Size));
   }
   return false;
 }
@@ -1343,8 +1374,8 @@ LLParser::PerFunctionState::~PerFunctionState() {
   for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
        I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
     if (!isa<BasicBlock>(I->second.first)) {
-      I->second.first->replaceAllUsesWith(UndefValue::get(I->second.first
-                                                          ->getType()));
+      I->second.first->replaceAllUsesWith(
+                           P.getContext().getUndef(I->second.first->getType()));
       delete I->second.first;
       I->second.first = 0;
     }
@@ -1352,8 +1383,8 @@ LLParser::PerFunctionState::~PerFunctionState() {
   for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
        I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
     if (!isa<BasicBlock>(I->second.first)) {
-      I->second.first->replaceAllUsesWith(UndefValue::get(I->second.first
-                                                          ->getType()));
+      I->second.first->replaceAllUsesWith(
+                           P.getContext().getUndef(I->second.first->getType()));
       delete I->second.first;
       I->second.first = 0;
     }
@@ -1592,16 +1623,27 @@ bool LLParser::ParseValID(ValID &ID) {
           ParseToken(lltok::rbrace, "expected end of metadata node"))
         return true;
 
-      ID.ConstantVal = MDNode::get(Elts.data(), Elts.size());
+      ID.ConstantVal = Context.getMDNode(Elts.data(), Elts.size());
       return false;
     }
 
+    // Standalone metadata reference
+    // !{ ..., !42, ... }
+    unsigned MID = 0;
+    if (!ParseUInt32(MID)) {
+      std::map<unsigned, Constant *>::iterator I = MetadataCache.find(MID);
+      if (I == MetadataCache.end())
+	return TokError("Unknown metadata reference");
+      ID.ConstantVal = I->second;
+      return false;
+    }
+    
     // MDString:
     //   ::= '!' STRINGCONSTANT
     std::string Str;
     if (ParseStringConstant(Str)) return true;
 
-    ID.ConstantVal = MDString::get(Str.data(), Str.data() + Str.size());
+    ID.ConstantVal = Context.getMDString(Str.data(), Str.data() + Str.size());
     return false;
   }
   case lltok::APSInt:
@@ -1613,11 +1655,11 @@ bool LLParser::ParseValID(ValID &ID) {
     ID.Kind = ValID::t_APFloat;
     break;
   case lltok::kw_true:
-    ID.ConstantVal = ConstantInt::getTrue();
+    ID.ConstantVal = Context.getConstantIntTrue();
     ID.Kind = ValID::t_Constant;
     break;
   case lltok::kw_false:
-    ID.ConstantVal = ConstantInt::getFalse();
+    ID.ConstantVal = Context.getConstantIntFalse();
     ID.Kind = ValID::t_Constant;
     break;
   case lltok::kw_null: ID.Kind = ValID::t_Null; break;
@@ -1632,7 +1674,7 @@ bool LLParser::ParseValID(ValID &ID) {
         ParseToken(lltok::rbrace, "expected end of struct constant"))
       return true;
     
-    ID.ConstantVal = ConstantStruct::get(Elts.data(), Elts.size(), false);
+    ID.ConstantVal = Context.getConstantStruct(Elts.data(), Elts.size(), false);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1651,7 +1693,8 @@ bool LLParser::ParseValID(ValID &ID) {
       return true;
     
     if (isPackedStruct) {
-      ID.ConstantVal = ConstantStruct::get(Elts.data(), Elts.size(), true);
+      ID.ConstantVal =
+        Context.getConstantStruct(Elts.data(), Elts.size(), true);
       ID.Kind = ValID::t_Constant;
       return false;
     }
@@ -1671,7 +1714,7 @@ bool LLParser::ParseValID(ValID &ID) {
                      "vector element #" + utostr(i) +
                     " is not of type '" + Elts[0]->getType()->getDescription());
     
-    ID.ConstantVal = ConstantVector::get(Elts.data(), Elts.size());
+    ID.ConstantVal = Context.getConstantVector(Elts.data(), Elts.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1695,7 +1738,7 @@ bool LLParser::ParseValID(ValID &ID) {
       return Error(FirstEltLoc, "invalid array element type: " + 
                    Elts[0]->getType()->getDescription());
           
-    ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
+    ArrayType *ATy = Context.getArrayType(Elts[0]->getType(), Elts.size());
     
     // Verify all elements are correct type!
     for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
@@ -1705,13 +1748,13 @@ bool LLParser::ParseValID(ValID &ID) {
                      " is not of type '" +Elts[0]->getType()->getDescription());
     }
     
-    ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size());
+    ID.ConstantVal = Context.getConstantArray(ATy, Elts.data(), Elts.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
   case lltok::kw_c:  // c "foo"
     Lex.Lex();
-    ID.ConstantVal = ConstantArray::get(Lex.getStrVal(), false);
+    ID.ConstantVal = Context.getConstantArray(Lex.getStrVal(), false);
     if (ParseToken(lltok::StringConstant, "expected string")) return true;
     ID.Kind = ValID::t_Constant;
     return false;
@@ -1757,8 +1800,8 @@ bool LLParser::ParseValID(ValID &ID) {
       return Error(ID.Loc, "invalid cast opcode for cast from '" +
                    SrcVal->getType()->getDescription() + "' to '" +
                    DestTy->getDescription() + "'");
-    ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc, SrcVal,
-                                           DestTy);
+    ID.ConstantVal = Context.getConstantExprCast((Instruction::CastOps)Opc, 
+                                                 SrcVal, DestTy);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1777,7 +1820,7 @@ bool LLParser::ParseValID(ValID &ID) {
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for extractvalue");
     ID.ConstantVal =
-      ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size());
+      Context.getConstantExprExtractValue(Val, Indices.data(), Indices.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1797,8 +1840,8 @@ bool LLParser::ParseValID(ValID &ID) {
     if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for insertvalue");
-    ID.ConstantVal =
-      ConstantExpr::getInsertValue(Val0, Val1, Indices.data(), Indices.size());
+    ID.ConstantVal = Context.getConstantExprInsertValue(Val0, Val1,
+                       Indices.data(), Indices.size());
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1825,24 +1868,24 @@ bool LLParser::ParseValID(ValID &ID) {
     if (Opc == Instruction::FCmp) {
       if (!Val0->getType()->isFPOrFPVector())
         return Error(ID.Loc, "fcmp requires floating point operands");
-      ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
+      ID.ConstantVal = Context.getConstantExprFCmp(Pred, Val0, Val1);
     } else if (Opc == Instruction::ICmp) {
       if (!Val0->getType()->isIntOrIntVector() &&
           !isa<PointerType>(Val0->getType()))
         return Error(ID.Loc, "icmp requires pointer or integer operands");
-      ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
+      ID.ConstantVal = Context.getConstantExprICmp(Pred, Val0, Val1);
     } else if (Opc == Instruction::VFCmp) {
       // FIXME: REMOVE VFCMP Support
       if (!Val0->getType()->isFPOrFPVector() ||
           !isa<VectorType>(Val0->getType()))
         return Error(ID.Loc, "vfcmp requires vector floating point operands");
-      ID.ConstantVal = ConstantExpr::getVFCmp(Pred, Val0, Val1);
+      ID.ConstantVal = Context.getConstantExprVFCmp(Pred, Val0, Val1);
     } else if (Opc == Instruction::VICmp) {
       // FIXME: REMOVE VICMP Support
       if (!Val0->getType()->isIntOrIntVector() ||
           !isa<VectorType>(Val0->getType()))
         return Error(ID.Loc, "vicmp requires vector floating point operands");
-      ID.ConstantVal = ConstantExpr::getVICmp(Pred, Val0, Val1);
+      ID.ConstantVal = Context.getConstantExprVICmp(Pred, Val0, Val1);
     }
     ID.Kind = ValID::t_Constant;
     return false;
@@ -1875,7 +1918,7 @@ bool LLParser::ParseValID(ValID &ID) {
     if (!Val0->getType()->isIntOrIntVector() &&
         !Val0->getType()->isFPOrFPVector())
       return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
-    ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+    ID.ConstantVal = Context.getConstantExpr(Opc, Val0, Val1);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -1901,7 +1944,7 @@ bool LLParser::ParseValID(ValID &ID) {
     if (!Val0->getType()->isIntOrIntVector())
       return Error(ID.Loc,
                    "constexpr requires integer or integer vector operands");
-    ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+    ID.ConstantVal = Context.getConstantExpr(Opc, Val0, Val1);
     ID.Kind = ValID::t_Constant;
     return false;
   }  
@@ -1926,7 +1969,7 @@ bool LLParser::ParseValID(ValID &ID) {
       if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
                                              (Value**)&Elts[1], Elts.size()-1))
         return Error(ID.Loc, "invalid indices for getelementptr");
-      ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0],
+      ID.ConstantVal = Context.getConstantExprGetElementPtr(Elts[0],
                                                       &Elts[1], Elts.size()-1);
     } else if (Opc == Instruction::Select) {
       if (Elts.size() != 3)
@@ -1934,26 +1977,28 @@ bool LLParser::ParseValID(ValID &ID) {
       if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
                                                               Elts[2]))
         return Error(ID.Loc, Reason);
-      ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
+      ID.ConstantVal = Context.getConstantExprSelect(Elts[0], Elts[1], Elts[2]);
     } else if (Opc == Instruction::ShuffleVector) {
       if (Elts.size() != 3)
         return Error(ID.Loc, "expected three operands to shufflevector");
       if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
         return Error(ID.Loc, "invalid operands to shufflevector");
-      ID.ConstantVal = ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]);
+      ID.ConstantVal =
+                 Context.getConstantExprShuffleVector(Elts[0], Elts[1],Elts[2]);
     } else if (Opc == Instruction::ExtractElement) {
       if (Elts.size() != 2)
         return Error(ID.Loc, "expected two operands to extractelement");
       if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
         return Error(ID.Loc, "invalid extractelement operands");
-      ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
+      ID.ConstantVal = Context.getConstantExprExtractElement(Elts[0], Elts[1]);
     } else {
       assert(Opc == Instruction::InsertElement && "Unknown opcode");
       if (Elts.size() != 3)
       return Error(ID.Loc, "expected three operands to insertelement");
       if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
         return Error(ID.Loc, "invalid insertelement operands");
-      ID.ConstantVal = ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
+      ID.ConstantVal =
+                 Context.getConstantExprInsertElement(Elts[0], Elts[1],Elts[2]);
     }
     
     ID.Kind = ValID::t_Constant;
@@ -1997,7 +2042,7 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
     if (!isa<IntegerType>(Ty))
       return Error(ID.Loc, "integer constant must have integer type");
     ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
-    V = ConstantInt::get(ID.APSIntVal);
+    V = Context.getConstantInt(ID.APSIntVal);
     return false;
   case ValID::t_APFloat:
     if (!Ty->isFloatingPoint() ||
@@ -2012,7 +2057,7 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
       ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
                             &Ignored);
     }
-    V = ConstantFP::get(ID.APFloatVal);
+    V = Context.getConstantFP(ID.APFloatVal);
       
     if (V->getType() != Ty)
       return Error(ID.Loc, "floating point constant does not have type '" +
@@ -2022,25 +2067,25 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
   case ValID::t_Null:
     if (!isa<PointerType>(Ty))
       return Error(ID.Loc, "null must be a pointer type");
-    V = ConstantPointerNull::get(cast<PointerType>(Ty));
+    V = Context.getConstantPointerNull(cast<PointerType>(Ty));
     return false;
   case ValID::t_Undef:
     // FIXME: LabelTy should not be a first-class type.
     if ((!Ty->isFirstClassType() || Ty == Type::LabelTy) &&
         !isa<OpaqueType>(Ty))
       return Error(ID.Loc, "invalid type for undef constant");
-    V = UndefValue::get(Ty);
+    V = Context.getUndef(Ty);
     return false;
   case ValID::t_EmptyArray:
     if (!isa<ArrayType>(Ty) || cast<ArrayType>(Ty)->getNumElements() != 0)
       return Error(ID.Loc, "invalid empty array initializer");
-    V = UndefValue::get(Ty);
+    V = Context.getUndef(Ty);
     return false;
   case ValID::t_Zero:
     // FIXME: LabelTy should not be a first-class type.
     if (!Ty->isFirstClassType() || Ty == Type::LabelTy)
       return Error(ID.Loc, "invalid type for null constant");
-    V = Constant::getNullValue(Ty);
+    V = Context.getNullValue(Ty);
     return false;
   case ValID::t_Constant:
     if (ID.ConstantVal->getType() != Ty)
@@ -2242,8 +2287,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       RetType != Type::VoidTy)
     return Error(RetTypeLoc, "functions with 'sret' argument must return void"); 
   
-  const FunctionType *FT = FunctionType::get(RetType, ParamTypeList, isVarArg);
-  const PointerType *PFT = PointerType::getUnqual(FT);
+  const FunctionType *FT =
+    Context.getFunctionType(RetType, ParamTypeList, isVarArg);
+  const PointerType *PFT = Context.getPointerTypeUnqual(FT);
 
   Fn = 0;
   if (!FunctionName.empty()) {
@@ -2558,7 +2604,7 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
       RVs.push_back(RV);
     }
 
-    RV = UndefValue::get(PFS.getFunction().getReturnType());
+    RV = Context.getUndef(PFS.getFunction().getReturnType());
     for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
       Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
       BB->getInstList().push_back(I);
@@ -2696,8 +2742,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     if (!FunctionType::isValidReturnType(RetType))
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
     
-    Ty = FunctionType::get(RetType, ParamTypes, false);
-    PFTy = PointerType::getUnqual(Ty);
+    Ty = Context.getFunctionType(RetType, ParamTypes, false);
+    PFTy = Context.getPointerTypeUnqual(Ty);
   }
   
   // Look up the callee.
@@ -3045,8 +3091,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
     if (!FunctionType::isValidReturnType(RetType))
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
     
-    Ty = FunctionType::get(RetType, ParamTypes, false);
-    PFTy = PointerType::getUnqual(Ty);
+    Ty = Context.getFunctionType(RetType, ParamTypes, false);
+    PFTy = Context.getPointerTypeUnqual(Ty);
   }
   
   // Look up the callee.
@@ -3116,7 +3162,7 @@ bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
                           unsigned Opc) {
   PATypeHolder Ty(Type::VoidTy);
   Value *Size = 0;
-  LocTy SizeLoc = 0;
+  LocTy SizeLoc;
   unsigned Alignment = 0;
   if (ParseType(Ty)) return true;
 
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 7106689..6659620 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -15,6 +15,7 @@
 #define LLVM_ASMPARSER_LLPARSER_H
 
 #include "LLLexer.h"
+#include "llvm/Module.h"
 #include "llvm/Type.h"
 #include <map>
 
@@ -35,7 +36,7 @@ namespace llvm {
   public:
     typedef LLLexer::LocTy LocTy;
   private:
-
+    LLVMContext& Context;
     LLLexer Lex;
     Module *M;
 
@@ -43,7 +44,8 @@ namespace llvm {
     std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
     std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
     std::vector<PATypeHolder> NumberedTypes;
-
+    /// MetadataCache - This map keeps track of parsed metadata constants.
+    std::map<unsigned, Constant *> MetadataCache;
     struct UpRefRecord {
       /// Loc - This is the location of the upref.
       LocTy Loc;
@@ -71,9 +73,12 @@ namespace llvm {
     std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
     std::vector<GlobalValue*> NumberedVals;
   public:
-    LLParser(MemoryBuffer *F, ParseError &Err, Module *m) : Lex(F, Err), M(m) {}
+    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
+      Context(m->getContext()), Lex(F, SM, Err), M(m) {}
     bool Run();
 
+    LLVMContext& getContext() { return Context; }
+
   private:
 
     bool Error(LocTy L, const std::string &Msg) const {
@@ -139,6 +144,7 @@ namespace llvm {
     bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
                      bool HasLinkage, unsigned Visibility);
     bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
+    bool ParseStandaloneMetadata();
 
     // Type Parsing.
     bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 759e00e..d66c13d 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -15,73 +15,47 @@
 #include "LLParser.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstring>
 using namespace llvm;
 
-Module *llvm::ParseAssemblyFile(const std::string &Filename, ParseError &Err) {
-  Err.setFilename(Filename);
-
+Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
+                                LLVMContext &Context) {
   std::string ErrorStr;
-  OwningPtr<MemoryBuffer>
-    F(MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr));
+  MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
   if (F == 0) {
-    Err.setError("Could not open input file '" + Filename + "'");
+    Err = SMDiagnostic("", -1, -1,
+                       "Could not open input file '" + Filename + "'", "");
     return 0;
   }
 
-  OwningPtr<Module> M(new Module(Filename));
-  if (LLParser(F.get(), Err, M.get()).Run())
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(F, SMLoc());
+  
+  OwningPtr<Module> M(new Module(Filename, Context));
+  if (LLParser(F, SM, Err, M.get()).Run())
     return 0;
   return M.take();
 }
 
 Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
-                                  ParseError &Err) {
-  Err.setFilename("<string>");
-
-  OwningPtr<MemoryBuffer>
-    F(MemoryBuffer::getMemBuffer(AsmString, AsmString+strlen(AsmString),
-                                 "<string>"));
+                                  SMDiagnostic &Err, LLVMContext &Context) {
+  MemoryBuffer *F =
+    MemoryBuffer::getMemBuffer(AsmString, AsmString+strlen(AsmString),
+                               "<string>");
   
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(F, SMLoc());
+
   // If we are parsing into an existing module, do it.
   if (M)
-    return LLParser(F.get(), Err, M).Run() ? 0 : M;
+    return LLParser(F, SM, Err, M).Run() ? 0 : M;
 
   // Otherwise create a new module.
-  OwningPtr<Module> M2(new Module("<string>"));
-  if (LLParser(F.get(), Err, M2.get()).Run())
+  OwningPtr<Module> M2(new Module("<string>", Context));
+  if (LLParser(F, SM, Err, M2.get()).Run())
     return 0;
   return M2.take();
 }
-
-
-//===------------------------------------------------------------------------===
-//                              ParseError Class
-//===------------------------------------------------------------------------===
-
-void ParseError::PrintError(const char *ProgName, raw_ostream &S) {
-  errs() << ProgName << ": ";
-  if (Filename == "-")
-    errs() << "<stdin>";
-  else
-    errs() << Filename;
-
-  if (LineNo != -1) {
-    errs() << ':' << LineNo;
-    if (ColumnNo != -1)
-      errs() << ':' << (ColumnNo+1);
-  }
-
-  errs() << ": " << Message << '\n';
-
-  if (LineNo != -1 && ColumnNo != -1) {
-    errs() << LineContents << '\n';
-
-    // Print out spaces/tabs before the caret.
-    for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
-      errs() << (LineContents[i] == '\t' ? '\t' : ' ');
-    errs() << "^\n";
-  }
-}
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 52851cd..e5b8f7c 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm-c/BitReader.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <string>
 #include <cstring>
@@ -22,7 +23,24 @@ int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
                      LLVMModuleRef *OutModule, char **OutMessage) {
   std::string Message;
   
-  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), &Message));
+  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(),  
+                                     &Message));
+  if (!*OutModule) {
+    if (OutMessage)
+      *OutMessage = strdup(Message.c_str());
+    return 1;
+  }
+  
+  return 0;
+}
+
+int LLVMParseBitcodeInContext(LLVMMemoryBufferRef MemBuf,
+                              LLVMContextRef ContextRef,
+                              LLVMModuleRef *OutModule, char **OutMessage) {
+  std::string Message;
+  
+  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),  
+                                     &Message));
   if (!*OutModule) {
     if (OutMessage)
       *OutMessage = strdup(Message.c_str());
@@ -37,10 +55,29 @@ int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
    Optionally returns a human-readable error message via OutMessage. */ 
 int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
                                  LLVMModuleProviderRef *OutMP,
-                                 char **OutMessage) {
+                                  char **OutMessage) {
+  std::string Message;
+
+  *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), getGlobalContext(), 
+                                         &Message));
+                                         
+  if (!*OutMP) {
+    if (OutMessage)
+      *OutMessage = strdup(Message.c_str());
+      return 1;
+  }
+
+  return 0;
+}
+
+int LLVMGetBitcodeModuleProviderInContext(LLVMMemoryBufferRef MemBuf,
+                                          LLVMContextRef ContextRef,
+                                          LLVMModuleProviderRef *OutMP,
+                                          char **OutMessage) {
   std::string Message;
   
-  *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), &Message));
+  *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef), 
+                                         &Message));
   if (!*OutMP) {
     if (OutMessage)
       *OutMessage = strdup(Message.c_str());
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 6b9606c..5943de2 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1087,7 +1087,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
     return Error("Malformed block record");
 
   // Otherwise, create the module.
-  TheModule = new Module(ModuleID);
+  TheModule = new Module(ModuleID, Context);
   
   SmallVector<uint64_t, 64> Record;
   std::vector<std::string> SectionTable;
@@ -2090,8 +2090,9 @@ Module *BitcodeReader::releaseModule(std::string *ErrInfo) {
 /// getBitcodeModuleProvider - lazy function-at-a-time loading from a file.
 ///
 ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer,
+                                               LLVMContext& Context,
                                                std::string *ErrMsg) {
-  BitcodeReader *R = new BitcodeReader(Buffer);
+  BitcodeReader *R = new BitcodeReader(Buffer, Context);
   if (R->ParseBitcode()) {
     if (ErrMsg)
       *ErrMsg = R->getErrorString();
@@ -2106,9 +2107,11 @@ ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer,
 
 /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
 /// If an error occurs, return null and fill in *ErrMsg if non-null.
-Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg){
+Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, 
+                               std::string *ErrMsg){
   BitcodeReader *R;
-  R = static_cast<BitcodeReader*>(getBitcodeModuleProvider(Buffer, ErrMsg));
+  R = static_cast<BitcodeReader*>(getBitcodeModuleProvider(Buffer, Context, 
+                                                           ErrMsg));
   if (!R) return 0;
   
   // Read in the entire module.
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 0dc470b..662631b 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -26,6 +26,7 @@
 
 namespace llvm {
   class MemoryBuffer;
+  class LLVMContext;
   
 //===----------------------------------------------------------------------===//
 //                          BitcodeReaderValueList Class
@@ -85,6 +86,7 @@ public:
 };
 
 class BitcodeReader : public ModuleProvider {
+  LLVMContext& Context;
   MemoryBuffer *Buffer;
   BitstreamReader StreamFile;
   BitstreamCursor Stream;
@@ -123,8 +125,8 @@ class BitcodeReader : public ModuleProvider {
   /// stream) and what linkage the original function had.
   DenseMap<Function*, std::pair<uint64_t, unsigned> > DeferredFunctionInfo;
 public:
-  explicit BitcodeReader(MemoryBuffer *buffer)
-      : Buffer(buffer), ErrorString(0) {
+  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext& C)
+      : Context(C), Buffer(buffer), ErrorString(0) {
     HasReversedFunctionsWithBodies = false;
   }
   ~BitcodeReader() {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bc3af9a..6d12581 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -42,8 +42,8 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
 
 char AsmPrinter::ID = 0;
 AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm,
-                       const TargetAsmInfo *T, CodeGenOpt::Level OL, bool VDef)
-  : MachineFunctionPass(&ID), FunctionNumber(0), OptLevel(OL), O(o),
+                       const TargetAsmInfo *T, bool VDef)
+  : MachineFunctionPass(&ID), FunctionNumber(0), O(o),
     TM(tm), TAI(T), TRI(tm.getRegisterInfo()),
     IsInTextSection(false), LastMI(0), LastFn(0), Counter(~0U),
     PrevDLT(0, ~0U, ~0U) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index cbe542b..547140f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -221,7 +221,7 @@ DbgScope::~DbgScope() {
 } // end llvm namespace
 
 DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
-  : Dwarf(OS, A, T, "dbg"), MainCU(0),
+  : Dwarf(OS, A, T, "dbg"), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
     ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(),
     SectionSourceLines(), didInitial(false), shouldEmit(false),
@@ -678,9 +678,6 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
       if (Element.getTag() == dwarf::DW_TAG_subprogram)
         ElemDie = CreateSubprogramDIE(DW_Unit,
                                       DISubprogram(Element.getGV()));
-      else if (Element.getTag() == dwarf::DW_TAG_variable) // ??
-        ElemDie = CreateGlobalVariableDIE(DW_Unit,
-                                          DIGlobalVariable(Element.getGV()));
       else
         ElemDie = CreateMemberDIE(DW_Unit,
                                   DIDerivedType(Element.getGV()));
@@ -1093,13 +1090,8 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
   // Get the subprogram debug information entry.
   DISubprogram SPD(Desc.getGV());
 
-  // Get the compile unit context.
-  CompileUnit *Unit = MainCU;
-  if (!Unit)
-    Unit = &FindCompileUnit(SPD.getCompileUnit());
-
   // Get the subprogram die.
-  DIE *SPDie = Unit->getDieMapSlotFor(SPD.getGV());
+  DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getGV());
   assert(SPDie && "Missing subprogram descriptor");
 
   if (!AbstractScope) {
@@ -1112,55 +1104,27 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
     AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
   }
 
-  ConstructDbgScope(RootScope, 0, 0, SPDie, Unit);
+  ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU);
 }
 
 /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
 ///
 void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
   const char *FnName = MF->getFunction()->getNameStart();
-  if (MainCU) {
-    StringMap<DIE*> &Globals = MainCU->getGlobals();
-    StringMap<DIE*>::iterator GI = Globals.find(FnName);
-    if (GI != Globals.end()) {
-      DIE *SPDie = GI->second;
-
-      // Add the function bounds.
-      AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-               DWLabel("func_begin", SubprogramCount));
-      AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-               DWLabel("func_end", SubprogramCount));
-
-      MachineLocation Location(RI->getFrameRegister(*MF));
-      AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
-      return;
-    }
-  } else {
-    for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) {
-      CompileUnit *Unit = CompileUnits[i];
-      StringMap<DIE*> &Globals = Unit->getGlobals();
-      StringMap<DIE*>::iterator GI = Globals.find(FnName);
-      if (GI != Globals.end()) {
-        DIE *SPDie = GI->second;
-
-        // Add the function bounds.
-        AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-                 DWLabel("func_begin", SubprogramCount));
-        AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-                 DWLabel("func_end", SubprogramCount));
-
-        MachineLocation Location(RI->getFrameRegister(*MF));
-        AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
-        return;
-      }
-    }
+  StringMap<DIE*> &Globals = ModuleCU->getGlobals();
+  StringMap<DIE*>::iterator GI = Globals.find(FnName);
+  if (GI != Globals.end()) {
+    DIE *SPDie = GI->second;
+    
+    // Add the function bounds.
+    AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+             DWLabel("func_begin", SubprogramCount));
+    AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+             DWLabel("func_end", SubprogramCount));
+    
+    MachineLocation Location(RI->getFrameRegister(*MF));
+    AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
   }
-
-#if 0
-  // FIXME: This is causing an abort because C++ mangled names are compared with
-  // their unmangled counterparts. See PR2885. Don't do this assert.
-  assert(0 && "Couldn't find DIE for machine function!");
-#endif
 }
 
 /// GetOrCreateSourceID - Look up the source id with the given directory and
@@ -1233,10 +1197,11 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
             dwarf::DW_FORM_data1, RVer);
 
   CompileUnit *Unit = new CompileUnit(ID, Die);
-  if (DIUnit.isMain()) {
-    assert(!MainCU && "Multiple main compile units are found!");
-    MainCU = Unit;
-    }
+  if (!ModuleCU && DIUnit.isMain()) {
+    // Use first compile unit marked as isMain as the compile unit
+    // for this module.
+    ModuleCU = Unit;
+  }
 
   CompileUnitMap[DIUnit.getGV()] = Unit;
   CompileUnits.push_back(Unit);
@@ -1244,16 +1209,13 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
 
 void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   DIGlobalVariable DI_GV(GV);
-  CompileUnit *DW_Unit = MainCU;
-  if (!DW_Unit)
-    DW_Unit = &FindCompileUnit(DI_GV.getCompileUnit());
 
   // Check for pre-existence.
-  DIE *&Slot = DW_Unit->getDieMapSlotFor(DI_GV.getGV());
+  DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getGV());
   if (Slot)
     return;
 
-  DIE *VariableDie = CreateGlobalVariableDIE(DW_Unit, DI_GV);
+  DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV);
 
   // Add address.
   DIEBlock *Block = new DIEBlock();
@@ -1267,22 +1229,19 @@ void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   Slot = VariableDie;
 
   // Add to context owner.
-  DW_Unit->getDie()->AddChild(VariableDie);
+  ModuleCU->getDie()->AddChild(VariableDie);
 
   // Expose as global. FIXME - need to check external flag.
   std::string Name;
-  DW_Unit->AddGlobal(DI_GV.getName(Name), VariableDie);
+  ModuleCU->AddGlobal(DI_GV.getName(Name), VariableDie);
   return;
 }
 
 void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   DISubprogram SP(GV);
-  CompileUnit *Unit = MainCU;
-  if (!Unit)
-    Unit = &FindCompileUnit(SP.getCompileUnit());
 
   // Check for pre-existence.
-  DIE *&Slot = Unit->getDieMapSlotFor(GV);
+  DIE *&Slot = ModuleCU->getDieMapSlotFor(GV);
   if (Slot)
     return;
 
@@ -1291,17 +1250,17 @@ void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
     // class type.
     return;
 
-  DIE *SubprogramDie = CreateSubprogramDIE(Unit, SP);
+  DIE *SubprogramDie = CreateSubprogramDIE(ModuleCU, SP);
 
   // Add to map.
   Slot = SubprogramDie;
 
   // Add to context owner.
-  Unit->getDie()->AddChild(SubprogramDie);
+  ModuleCU->getDie()->AddChild(SubprogramDie);
 
   // Expose as global.
   std::string Name;
-  Unit->AddGlobal(SP.getName(Name), SubprogramDie);
+  ModuleCU->AddGlobal(SP.getName(Name), SubprogramDie);
   return;
 }
 
@@ -1331,6 +1290,11 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
     return;
   }
 
+  // If main compile unit for this module is not seen than randomly
+  // select first compile unit.
+  if (!ModuleCU)
+    ModuleCU = CompileUnits[0];
+
   // If there is not any debug info available for any global variables and any
   // subprograms then there is not any debug info to emit.
   if (GVs.empty() && SPs.empty()) {
@@ -1707,9 +1671,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  CompileUnit *Unit = MainCU;
-  if (!Unit)
-    Unit = &FindCompileUnit(SP.getCompileUnit());
   GlobalVariable *GV = SP.getGV();
   DenseMap<const GlobalVariable *, DbgScope *>::iterator
     II = AbstractInstanceRootMap.find(GV);
@@ -1720,9 +1681,9 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
     DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
 
     // Get the compile unit context.
-    DIE *SPDie = Unit->getDieMapSlotFor(GV);
+    DIE *SPDie = ModuleCU->getDieMapSlotFor(GV);
     if (!SPDie)
-      SPDie = CreateSubprogramDIE(Unit, SP, false, true);
+      SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true);
 
     // Mark as being inlined. This makes this subprogram entry an abstract
     // instance root.
@@ -1741,12 +1702,12 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   // Create a concrete inlined instance for this inlined function.
   DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
   DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
-  ScopeDie->setAbstractCompileUnit(Unit);
+  ScopeDie->setAbstractCompileUnit(ModuleCU);
 
-  DIE *Origin = Unit->getDieMapSlotFor(GV);
+  DIE *Origin = ModuleCU->getDieMapSlotFor(GV);
   AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
               dwarf::DW_FORM_ref4, Origin);
-  AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, Unit->getID());
+  AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
   AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line);
   AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col);
 
@@ -1907,22 +1868,8 @@ void DwarfDebug::SizeAndOffsets() {
     sizeof(int32_t) + // Offset Into Abbrev. Section
     sizeof(int8_t);   // Pointer Size (in bytes)
 
-  // Process base compile unit.
-  if (MainCU) {
-    SizeAndOffsetDie(MainCU->getDie(), Offset, true);
-    CompileUnitOffsets[MainCU] = 0;
-    return;
-  }
-
-  // Process all compile units.
-  unsigned PrevOffset = 0;
-
-  for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) {
-    CompileUnit *Unit = CompileUnits[i];
-    CompileUnitOffsets[Unit] = PrevOffset;
-    PrevOffset += SizeAndOffsetDie(Unit->getDie(), Offset, true)
-      + sizeof(int32_t);  // FIXME - extra pad for gdb bug.
-  }
+  SizeAndOffsetDie(ModuleCU->getDie(), Offset, true);
+  CompileUnitOffsets[ModuleCU] = 0;
 }
 
 /// EmitInitial - Emit initial Dwarf declarations.  This is necessary for cc
@@ -2067,13 +2014,7 @@ void DwarfDebug::EmitDebugInfo() {
   // Start debug info section.
   Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
 
-  if (MainCU) {
-    EmitDebugInfoPerCU(MainCU);
-    return;
-  }
-
-  for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i)
-    EmitDebugInfoPerCU(CompileUnits[i]);
+  EmitDebugInfoPerCU(ModuleCU);
 }
 
 /// EmitAbbreviations - Emit the abbreviation section.
@@ -2405,13 +2346,7 @@ void DwarfDebug::EmitDebugPubNames() {
   // Start the dwarf pubnames section.
   Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
 
-  if (MainCU) {
-    EmitDebugPubNamesPerCU(MainCU);
-    return;
-  }
-
-  for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i)
-    EmitDebugPubNamesPerCU(CompileUnits[i]);
+  EmitDebugPubNamesPerCU(ModuleCU);
 }
 
 /// EmitDebugStr - Emit visible names into a debug str section.
@@ -2521,7 +2456,7 @@ void DwarfDebug::EmitDebugInlineInfo() {
   if (!TAI->doesDwarfUsesInlineInfoSection())
     return;
 
-  if (!MainCU)
+  if (!ModuleCU)
     return;
 
   Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection());
@@ -2555,7 +2490,7 @@ void DwarfDebug::EmitDebugInlineInfo() {
 
     for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
            LE = Labels.end(); LI != LE; ++LI) {
-      DIE *SP = MainCU->getDieMapSlotFor(GV);
+      DIE *SP = ModuleCU->getDieMapSlotFor(GV);
       Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
 
       if (TD->getPointerSize() == sizeof(int32_t))
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 111ec33..101dc70 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -70,9 +70,8 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   ///
   SmallVector<CompileUnit *, 8> CompileUnits;
 
-  /// MainCU - Some platform prefers one compile unit per .o file. In such
-  /// cases, all dies are inserted in MainCU.
-  CompileUnit *MainCU;
+  /// ModuleCU - All DIEs are inserted in ModuleCU.
+  CompileUnit *ModuleCU;
 
   /// AbbreviationsSet - Used to uniquely define abbreviations.
   ///
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index eeefe31..48f17d0 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -64,4 +64,4 @@ add_llvm_library(LLVMCodeGen
   VirtRegRewriter.cpp
   )
 
-target_link_libraries (LLVMCodeGen LLVMCore)
+target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts)
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index 8d92373..7e983a4 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -20,14 +20,12 @@
 #ifndef CODEGEN_ELF_H
 #define CODEGEN_ELF_H
 
-#include "llvm/GlobalVariable.h"
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Support/DataTypes.h"
-#include <cstring>
 
 namespace llvm {
-  class BinaryObject;
+  class GlobalValue;
 
   // Identification Indexes
   enum {
@@ -172,41 +170,25 @@ namespace llvm {
                                     IsConstant(false), NameIdx(0), Value(0),
                                     Size(0), Info(0), Other(STV_DEFAULT),
                                     SectionIdx(ELFSection::SHN_UNDEF),
-                                    SymTabIdx(0) {
-      if (!GV)
-        return;
-
-      switch (GV->getVisibility()) {
-      default:
-        assert(0 && "unknown visibility type");
-      case GlobalValue::DefaultVisibility:
-        Other = STV_DEFAULT;
-        break;
-      case GlobalValue::HiddenVisibility:
-        Other = STV_HIDDEN;
-        break;
-      case GlobalValue::ProtectedVisibility:
-        Other = STV_PROTECTED;
-        break;
-      }
-    }
-
-    unsigned getBind() {
-      return (Info >> 4) & 0xf;
-    }
+                                    SymTabIdx(0) {}
 
-    unsigned getType() {
-      return Info & 0xf;
-    }
+    unsigned getBind() { return (Info >> 4) & 0xf; }
+    unsigned getType() { return Info & 0xf; }
 
     void setBind(unsigned X) {
       assert(X == (X & 0xF) && "Bind value out of range!");
       Info = (Info & 0x0F) | (X << 4);
     }
+
     void setType(unsigned X) {
       assert(X == (X & 0xF) && "Type value out of range!");
       Info = (Info & 0xF0) | X;
     }
+
+    void setVisibility(unsigned V) {
+      assert(V == (V & 0x3) && "Type value out of range!");
+      Other = V;
+    }
   };
 
   /// ELFRelocation - This class contains all the information necessary to
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 168fed5..691f194 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -9,6 +9,8 @@
 
 #define DEBUG_TYPE "elfce"
 
+#include "ELF.h"
+#include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -16,8 +18,10 @@
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Support/Debug.h"
 
 //===----------------------------------------------------------------------===//
@@ -40,10 +44,11 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) {
   BufferBegin = &BD[0];
   BufferEnd = BufferBegin + BD.capacity();
 
-  // Align the output buffer with function alignment, and
-  // upgrade the section alignment if required
-  unsigned Align =
-    TM.getELFWriterInfo()->getFunctionAlignment(MF.getFunction());
+  // Get the function alignment in bytes
+  unsigned Align = (1 << MF.getAlignment());
+
+  // Align the section size with the function alignment, so the function can
+  // start in a aligned offset, also update the section alignment if needed.
   if (ES->Align < Align) ES->Align = Align;
   ES->Size = (ES->Size + (Align-1)) & (-Align);
 
@@ -66,47 +71,35 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) {
 /// finishFunction - This callback is invoked after the function is completely
 /// finished.
 bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
-  // Add a symbol to represent the function.
-  ELFSym FnSym(MF.getFunction());
-
   // Update Section Size
   ES->Size = CurBufferPtr - BufferBegin;
 
-  // Set the symbol type as a function
+  // Add a symbol to represent the function.
+  const Function *F = MF.getFunction();
+  ELFSym FnSym(F);
   FnSym.setType(ELFSym::STT_FUNC);
+  FnSym.setBind(EW.getGlobalELFLinkage(F));
+  FnSym.setVisibility(EW.getGlobalELFVisibility(F));
   FnSym.SectionIdx = ES->SectionIdx;
   FnSym.Size = CurBufferPtr-FnStartPtr;
 
   // Offset from start of Section
   FnSym.Value = FnStartPtr-BufferBegin;
 
-  // Figure out the binding (linkage) of the symbol.
-  switch (MF.getFunction()->getLinkage()) {
-  default:
-    // appending linkage is illegal for functions.
-    assert(0 && "Unknown linkage type!");
-  case GlobalValue::ExternalLinkage:
-    FnSym.setBind(ELFSym::STB_GLOBAL);
-    EW.SymbolList.push_back(FnSym);
-    break;
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage:
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:
-    FnSym.setBind(ELFSym::STB_WEAK);
-    EW.SymbolList.push_back(FnSym);
-    break;
-  case GlobalValue::PrivateLinkage:
-    assert (0 && "PrivateLinkage should not be in the symbol table.");
-  case GlobalValue::InternalLinkage:
-    FnSym.setBind(ELFSym::STB_LOCAL);
-    EW.SymbolList.push_front(FnSym);
-    break;
+  // Locals should go on the symbol list front
+  if (!F->hasPrivateLinkage()) {
+    if (FnSym.getBind() == ELFSym::STB_LOCAL)
+      EW.SymbolList.push_front(FnSym);
+    else
+      EW.SymbolList.push_back(FnSym);
   }
 
   // Emit constant pool to appropriate section(s)
   emitConstantPool(MF.getConstantPool());
 
+  // Emit jump tables to appropriate section
+  emitJumpTables(MF.getJumpTableInfo());
+
   // Relocations
   // -----------
   // If we have emitted any relocations to function-specific objects such as
@@ -126,13 +119,22 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
       Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
       MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
       MR.setResultPointer((void*)Addr);
+    } else if (MR.isJumpTableIndex()) {
+      Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+      MR.setResultPointer((void*)Addr);
+      MR.setConstantVal(JumpTableSectionIdx);
     } else {
       assert(0 && "Unhandled relocation type");
     }
     ES->addRelocation(MR);
   }
-  Relocations.clear();
 
+  // Clear per-function data structures.
+  Relocations.clear();
+  CPLocations.clear();
+  CPSections.clear();
+  JTLocations.clear();
+  MBBLocations.clear();
   return false;
 }
 
@@ -168,4 +170,56 @@ void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
   }
 }
 
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  // FIXME: handle PIC codegen
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "PIC codegen not yet handled for elf jump tables!");
+
+  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+
+  // Get the ELF Section to emit the jump table
+  unsigned Align = TM.getTargetData()->getPointerABIAlignment();
+  std::string JTName(TAI->getJumpTableDataSection());
+  ELFSection &JTSection = EW.getJumpTableSection(JTName, Align);
+  JumpTableSectionIdx = JTSection.SectionIdx;
+
+  // Entries in the JT Section are relocated against the text section
+  ELFSection &TextSection = EW.getTextSection();
+
+  // For each JT, record its offset from the start of the section
+  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+    const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+
+    DOUT << "JTSection.size(): " << JTSection.size() << "\n";
+    DOUT << "JTLocations.size: " << JTLocations.size() << "\n";
+
+    // Record JT 'i' offset in the JT section
+    JTLocations.push_back(JTSection.size());
+
+    // Each MBB entry in the Jump table section has a relocation entry
+    // against the current text section.
+    for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+      MachineRelocation MR =
+        MachineRelocation::getBB(JTSection.size(),
+                                 MachineRelocation::VANILLA,
+                                 MBBs[mi]);
+
+      // Offset of JT 'i' in JT section
+      MR.setResultPointer((void*)getMachineBasicBlockAddress(MBBs[mi]));
+      MR.setConstantVal(TextSection.SectionIdx);
+
+      // Add the relocation to the Jump Table section
+      JTSection.addRelocation(MR);
+
+      // Output placeholder for MBB in the JT section
+      JTSection.emitWord(0);
+    }
+  }
+}
+
 } // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index c309ef7..982aebf 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -10,11 +10,12 @@
 #ifndef ELFCODEEMITTER_H
 #define ELFCODEEMITTER_H
 
-#include "ELFWriter.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include <vector>
 
 namespace llvm {
+  class ELFWriter;
+  class ELFSection;
 
   /// ELFCodeEmitter - This class is used by the ELFWriter to 
   /// emit the code for functions to the ELF file.
@@ -39,6 +40,10 @@ namespace llvm {
     /// containing the constant pool entry for that index.
     std::vector<unsigned> CPSections;
 
+    /// JTLocations - This is a map of jump table indices to offsets from the
+    /// start of the section for that jump table index.
+    std::vector<uintptr_t> JTLocations;
+
     /// MBBLocations - This vector is a mapping from MBB ID's to their address.
     /// It is filled in by the StartMachineBasicBlock callback and queried by
     /// the getMachineBasicBlockAddress callback.
@@ -47,8 +52,12 @@ namespace llvm {
     /// FnStartPtr - Pointer to the start location of the current function
     /// in the buffer
     uint8_t *FnStartPtr;
+
+    /// JumpTableSectionIdx - Holds the index of the Jump Table Section 
+    unsigned JumpTableSectionIdx;
   public:
-    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
+    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM),
+                                             JumpTableSectionIdx(0) {}
 
     void startFunction(MachineFunction &F);
     bool finishFunction(MachineFunction &F);
@@ -63,25 +72,20 @@ namespace llvm {
       MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
     }
 
-    virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) {
-      assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
-             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-      return MBBLocations[MBB->getNumber()];
-    }
-
     virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
       assert(CPLocations.size() > Index && "CP not emitted!");
       return CPLocations[Index];
     }
 
     virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
-      assert(0 && "JT not implementated yet!");
-      return 0;
+      assert(JTLocations.size() > Index && "JT not emitted!");
+      return JTLocations[Index];
     }
 
     virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-      assert(0 && "JT not implementated yet!");
-      return 0;
+      assert(MBBLocations.size() > (unsigned)MBB->getNumber() && 
+             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+      return MBBLocations[MBB->getNumber()];
     }
 
     virtual uintptr_t getLabelAddress(uint64_t Label) const {
@@ -99,7 +103,11 @@ namespace llvm {
     /// the constant should live in and emit the constant.
     void emitConstantPool(MachineConstantPool *MCP);
 
-    virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
+    /// emitJumpTables - Emit all the jump tables for a given jump table info
+    /// record to the appropriate section.
+    void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+    virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
 
     /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
     void startGVStub(const GlobalValue* F, unsigned StubSize,
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 041defa..9e91524 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -30,9 +30,9 @@
 
 #define DEBUG_TYPE "elfwriter"
 
+#include "ELF.h"
 #include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
-#include "ELF.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
@@ -41,14 +41,14 @@
 #include "llvm/CodeGen/FileWriters.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
-#include <list>
 using namespace llvm;
 
 char ELFWriter::ID = 0;
@@ -141,7 +141,22 @@ bool ELFWriter::doInitialization(Module &M) {
   return false;
 }
 
-unsigned ELFWriter::getGlobalELFLinkage(const GlobalVariable *GV) {
+unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
+  switch (GV->getVisibility()) {
+  default:
+    assert(0 && "unknown visibility type");
+  case GlobalValue::DefaultVisibility:
+    return ELFSym::STV_DEFAULT;
+  case GlobalValue::HiddenVisibility:
+    return ELFSym::STV_HIDDEN;
+  case GlobalValue::ProtectedVisibility:
+    return ELFSym::STV_PROTECTED;
+  }
+
+  return 0;
+}
+
+unsigned ELFWriter::getGlobalELFLinkage(const GlobalValue *GV) {
   if (GV->hasInternalLinkage())
     return ELFSym::STB_LOCAL;
 
@@ -151,43 +166,51 @@ unsigned ELFWriter::getGlobalELFLinkage(const GlobalVariable *GV) {
   return ELFSym::STB_GLOBAL;
 }
 
+// getElfSectionFlags - Get the ELF Section Header based on the
+// flags defined in ELFTargetAsmInfo.
+unsigned ELFWriter::getElfSectionFlags(unsigned Flags) {
+  unsigned ElfSectionFlags = ELFSection::SHF_ALLOC;
+
+  if (Flags & SectionFlags::Code)
+    ElfSectionFlags |= ELFSection::SHF_EXECINSTR;
+  if (Flags & SectionFlags::Writeable)
+    ElfSectionFlags |= ELFSection::SHF_WRITE;
+  if (Flags & SectionFlags::Mergeable)
+    ElfSectionFlags |= ELFSection::SHF_MERGE;
+  if (Flags & SectionFlags::TLS)
+    ElfSectionFlags |= ELFSection::SHF_TLS;
+  if (Flags & SectionFlags::Strings)
+    ElfSectionFlags |= ELFSection::SHF_STRINGS;
+
+  return ElfSectionFlags;
+}
+
 // For global symbols without a section, return the Null section as a
 // placeholder
 ELFSection &ELFWriter::getGlobalSymELFSection(const GlobalVariable *GV,
                                               ELFSym &Sym) {
-  const Section *S = TAI->SectionForGlobal(GV);
-  unsigned Flags = S->getFlags();
-  unsigned SectionType = ELFSection::SHT_PROGBITS;
-  unsigned SHdrFlags = ELFSection::SHF_ALLOC;
-  DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n";
-
-  // If this is an external global, the symbol does not have a section.
+  // If this is a declaration, the symbol does not have a section.
   if (!GV->hasInitializer()) {
     Sym.SectionIdx = ELFSection::SHN_UNDEF;
     return getNullSection();
   }
 
+  // Get the name and flags of the section for the global
+  const Section *S = TAI->SectionForGlobal(GV);
+  unsigned SectionType = ELFSection::SHT_PROGBITS;
+  unsigned SectionFlags = getElfSectionFlags(S->getFlags());
+  DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n";
+
   const TargetData *TD = TM.getTargetData();
   unsigned Align = TD->getPreferredAlignment(GV);
   Constant *CV = GV->getInitializer();
 
-  if (Flags & SectionFlags::Code)
-    SHdrFlags |= ELFSection::SHF_EXECINSTR;
-  if (Flags & SectionFlags::Writeable)
-    SHdrFlags |= ELFSection::SHF_WRITE;
-  if (Flags & SectionFlags::Mergeable)
-    SHdrFlags |= ELFSection::SHF_MERGE;
-  if (Flags & SectionFlags::TLS)
-    SHdrFlags |= ELFSection::SHF_TLS;
-  if (Flags & SectionFlags::Strings)
-    SHdrFlags |= ELFSection::SHF_STRINGS;
-
   // If this global has a zero initializer, go to .bss or common section.
   // Variables are part of the common block if they are zero initialized
   // and allowed to be merged with other symbols.
   if (CV->isNullValue() || isa<UndefValue>(CV)) {
     SectionType = ELFSection::SHT_NOBITS;
-    ELFSection &ElfS = getSection(S->getName(), SectionType, SHdrFlags);
+    ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags);
     if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
         GV->hasCommonLinkage()) {
       Sym.SectionIdx = ELFSection::SHN_COMMON;
@@ -203,7 +226,7 @@ ELFSection &ELFWriter::getGlobalSymELFSection(const GlobalVariable *GV,
   }
 
   Sym.IsConstant = true;
-  ELFSection &ElfS = getSection(S->getName(), SectionType, SHdrFlags);
+  ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags);
   Sym.SectionIdx = ElfS.SectionIdx;
   ElfS.Align = std::max(ElfS.Align, Align);
   return ElfS;
@@ -213,6 +236,7 @@ void ELFWriter::EmitFunctionDeclaration(const Function *F) {
   ELFSym GblSym(F);
   GblSym.setBind(ELFSym::STB_GLOBAL);
   GblSym.setType(ELFSym::STT_NOTYPE);
+  GblSym.setVisibility(ELFSym::STV_DEFAULT);
   GblSym.SectionIdx = ELFSection::SHN_UNDEF;
   SymbolList.push_back(GblSym);
 }
@@ -222,6 +246,7 @@ void ELFWriter::EmitGlobalVar(const GlobalVariable *GV) {
   unsigned Align=0, Size=0;
   ELFSym GblSym(GV);
   GblSym.setBind(SymBind);
+  GblSym.setVisibility(getGlobalELFVisibility(GV));
 
   if (GV->hasInitializer()) {
     GblSym.setType(ELFSym::STT_OBJECT);
@@ -402,6 +427,7 @@ bool ELFWriter::doFinalization(Module &M) {
     SectionSym.Size = 0;
     SectionSym.setBind(ELFSym::STB_LOCAL);
     SectionSym.setType(ELFSym::STT_SECTION);
+    SectionSym.setVisibility(ELFSym::STV_DEFAULT);
 
     // Local symbols go in the list front
     SymbolList.push_front(SectionSym);
@@ -443,7 +469,8 @@ void ELFWriter::EmitRelocations() {
 
     // Get the relocation section for section 'I'
     bool HasRelA = TEW->hasRelocationAddend();
-    ELFSection &RelSec = getRelocSection(I->getName(), HasRelA);
+    ELFSection &RelSec = getRelocSection(I->getName(), HasRelA,
+                                         TEW->getPrefELFAlignment());
 
     // 'Link' - Section hdr idx of the associated symbol table
     // 'Info' - Section hdr idx of the section to which the relocation applies
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index e0e71d0..bab118c 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -16,21 +16,23 @@
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "ELF.h"
 #include <list>
 #include <map>
 
 namespace llvm {
   class BinaryObject;
+  class Constant;
   class ConstantStruct;
   class ELFCodeEmitter;
   class GlobalVariable;
   class Mangler;
   class MachineCodeEmitter;
+  class TargetAsmInfo;
+  class TargetELFWriterInfo;
   class raw_ostream;
+  class ELFSection;
+  class ELFSym;
+  class ELFRelocation;
 
   /// ELFWriter - This class implements the common target-independent code for
   /// writing ELF files.  Targets should derive a class from this to
@@ -116,19 +118,33 @@ namespace llvm {
     /// is seen, the symbol will move from this list to the SymbolList.
     SetVector<GlobalValue*> PendingGlobals;
 
+    // Remove tab from section name prefix. This is necessary becase TAI 
+    // sometimes return a section name prefixed with a "\t" char. This is
+    // a little bit dirty. FIXME: find a better approach, maybe add more
+    // methods to TAI to get the clean name?
+    void fixNameForSection(std::string &Name) {
+      size_t Pos = Name.find("\t");
+      if (Pos != std::string::npos)
+        Name.erase(Pos, 1);
+
+      Pos = Name.find(".section ");
+      if (Pos != std::string::npos)
+        Name.erase(Pos, 9);
+
+      Pos = Name.find("\n");
+      if (Pos != std::string::npos)
+        Name.erase(Pos, 1);
+    }
+
     /// getSection - Return the section with the specified name, creating a new
     /// section if one does not already exist.
     ELFSection &getSection(const std::string &Name, unsigned Type,
                            unsigned Flags = 0, unsigned Align = 0) {
-      ELFSection *&SN = SectionLookup[Name];
-      if (SN) return *SN;
-
-      // Remove tab from section name prefix. This is necessary becase TAI 
-      // sometimes return a section name prefixed with a "\t" char.
       std::string SectionName(Name);
-      size_t Pos = SectionName.find("\t");
-      if (Pos != std::string::npos)
-        SectionName.erase(Pos, 1);
+      fixNameForSection(SectionName);
+
+      ELFSection *&SN = SectionLookup[SectionName];
+      if (SN) return *SN;
 
       SectionList.push_back(ELFSection(SectionName, isLittleEndian, is64Bit));
       SN = &SectionList.back();
@@ -147,6 +163,12 @@ namespace llvm {
                         ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
     }
 
+    /// Get jump table section on the section name returned by TAI
+    ELFSection &getJumpTableSection(std::string SName, unsigned Align) {
+      return getSection(SName, ELFSection::SHT_PROGBITS,
+                        ELFSection::SHF_ALLOC, Align);
+    }
+
     /// Get a constant pool section based on the section name returned by TAI
     ELFSection &getConstantPoolSection(std::string SName, unsigned Align) {
       return getSection(SName, ELFSection::SHT_PROGBITS,
@@ -155,14 +177,14 @@ namespace llvm {
 
     /// Return the relocation section of section 'S'. 'RelA' is true
     /// if the relocation section contains entries with addends.
-    ELFSection &getRelocSection(std::string SName, bool RelA) {
+    ELFSection &getRelocSection(std::string SName, bool RelA, unsigned Align) {
       std::string RelSName(".rel");
       unsigned SHdrTy = RelA ? ELFSection::SHT_RELA : ELFSection::SHT_REL;
 
       if (RelA) RelSName.append("a");
       RelSName.append(SName);
 
-      return getSection(RelSName, SHdrTy, 0, TEW->getPrefELFAlignment());
+      return getSection(RelSName, SHdrTy, 0, Align);
     }
 
     ELFSection &getNonExecStackSection() {
@@ -195,6 +217,11 @@ namespace llvm {
       return getSection("", ELFSection::SHT_NULL, 0);
     }
 
+    // Helpers for obtaining ELF specific info.
+    unsigned getGlobalELFLinkage(const GlobalValue *GV);
+    unsigned getGlobalELFVisibility(const GlobalValue *GV);
+    unsigned getElfSectionFlags(unsigned Flags);
+
     // As we complete the ELF file, we need to update fields in the ELF header
     // (e.g. the location of the section table).  These members keep track of
     // the offset in ELFHeader of these various pieces to update and other
@@ -209,7 +236,6 @@ namespace llvm {
     void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
     void EmitGlobalConstantStruct(const ConstantStruct *CVS,
                                   ELFSection &GblS);
-    unsigned getGlobalELFLinkage(const GlobalVariable *GV);
     ELFSection &getGlobalSymELFSection(const GlobalVariable *GV, ELFSym &Sym);
     void EmitRelocations();
     void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 21bb5dc..52a30bc 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -33,6 +34,8 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
@@ -98,6 +101,120 @@ void LiveIntervals::releaseMemory() {
   }
 }
 
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
+/// there is one implicit_def for each use. Add isUndef marker to
+/// implicit_def defs and their uses.
+void LiveIntervals::processImplicitDefs() {
+  SmallSet<unsigned, 8> ImpDefRegs;
+  SmallVector<MachineInstr*, 8> ImpDefMIs;
+  MachineBasicBlock *Entry = mf_->begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MachineBasicBlock *MBB = *DFI;
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ) {
+      MachineInstr *MI = &*I;
+      ++I;
+      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+        unsigned Reg = MI->getOperand(0).getReg();
+        MI->getOperand(0).setIsUndef();
+        ImpDefRegs.insert(Reg);
+        ImpDefMIs.push_back(MI);
+        continue;
+      }
+
+      bool ChangedToImpDef = false;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand& MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        if (!ImpDefRegs.count(Reg))
+          continue;
+        // Use is a copy, just turn it into an implicit_def.
+        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+        if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+            Reg == SrcReg) {
+          bool isKill = MO.isKill();
+          MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+            MI->RemoveOperand(j);
+          if (isKill)
+            ImpDefRegs.erase(Reg);
+          ChangedToImpDef = true;
+          break;
+        }
+
+        MO.setIsUndef();
+        if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+          ImpDefRegs.erase(Reg);
+      }
+
+      if (ChangedToImpDef) {
+        // Backtrack to process this new implicit_def.
+        --I;
+      } else {
+        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+          MachineOperand& MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          ImpDefRegs.erase(MO.getReg());
+        }
+      }
+    }
+
+    // Any outstanding liveout implicit_def's?
+    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
+      MachineInstr *MI = ImpDefMIs[i];
+      unsigned Reg = MI->getOperand(0).getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg))
+        // Physical registers are not liveout (yet).
+        continue;
+      if (!ImpDefRegs.count(Reg))
+        continue;
+
+      // If there are multiple defs of the same register and at least one
+      // is not an implicit_def, do not insert implicit_def's before the
+      // uses.
+      bool Skip = false;
+      for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
+             DE = mri_->def_end(); DI != DE; ++DI) {
+        if (DI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+          Skip = true;
+          break;
+        }
+      }
+      if (Skip)
+        continue;
+
+      for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+             UE = mri_->use_end(); UI != UE; ) {
+        MachineOperand &RMO = UI.getOperand();
+        MachineInstr *RMI = &*UI;
+        ++UI;
+        MachineBasicBlock *RMBB = RMI->getParent();
+        if (RMBB == MBB)
+          continue;
+        const TargetRegisterClass* RC = mri_->getRegClass(Reg);
+        unsigned NewVReg = mri_->createVirtualRegister(RC);
+        MachineInstrBuilder MIB =
+          BuildMI(*RMBB, RMI, RMI->getDebugLoc(),
+                  tii_->get(TargetInstrInfo::IMPLICIT_DEF), NewVReg);
+        (*MIB).getOperand(0).setIsUndef();
+        RMO.setReg(NewVReg);
+        RMO.setIsUndef();
+        RMO.setIsKill();
+      }
+    }
+    ImpDefRegs.clear();
+    ImpDefMIs.clear();
+  }
+}
+
 void LiveIntervals::computeNumbering() {
   Index2MiMap OldI2MI = i2miMap_;
   std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap;
@@ -299,6 +416,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   lv_ = &getAnalysis<LiveVariables>();
   allocatableRegs_ = tri_->getAllocatableSet(fn);
 
+  processImplicitDefs();
   computeNumbering();
   computeIntervals();
 
@@ -1782,8 +1900,10 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
       NewLIs.push_back(&getOrCreateInterval(NewVReg));
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == li.reg)
+        if (MO.isReg() && MO.getReg() == li.reg) {
           MO.setReg(NewVReg);
+          MO.setIsUndef();
+        }
       }
     }
   }
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 2d2b59e..599efb8 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -14,6 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -22,15 +26,12 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Config/config.h"
 #include <fstream>
 #include <sstream>
 using namespace llvm;
@@ -124,6 +125,7 @@ MachineFunction::MachineFunction(const Function *F,
                   MachineFrameInfo(*TM.getFrameInfo());
   ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
                      MachineConstantPool(TM.getTargetData());
+  Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
 
   // Set up jump table.
   const TargetData &TD = *TM.getTargetData();
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index c977508..d44305f 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -120,7 +120,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
 /// the specified value.  If an operand is known to be an register already,
 /// the setReg method should be used.
 void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
-                                      bool isKill, bool isDead) {
+                                      bool isKill, bool isDead, bool isUndef) {
   // If this operand is already a register operand, use setReg to update the 
   // register's use/def lists.
   if (isReg()) {
@@ -143,6 +143,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   IsImp = isImp;
   IsKill = isKill;
   IsDead = isDead;
+  IsUndef = isUndef;
   IsEarlyClobber = false;
   SubReg = 0;
 }
@@ -206,11 +207,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
         OS << "%mreg" << getReg();
     }
 
-    if (getSubReg() != 0) {
+    if (getSubReg() != 0)
       OS << ':' << getSubReg();
-    }
 
-    if (isDef() || isKill() || isDead() || isImplicit() || isEarlyClobber()) {
+    if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+        isEarlyClobber()) {
       OS << '<';
       bool NeedComma = false;
       if (isImplicit()) {
@@ -224,10 +225,15 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
         OS << "def";
         NeedComma = true;
       }
-      if (isKill() || isDead()) {
+      if (isKill() || isDead() || isUndef()) {
         if (NeedComma) OS << ',';
         if (isKill())  OS << "kill";
         if (isDead())  OS << "dead";
+        if (isUndef()) {
+          if (isKill() || isDead())
+            OS << ',';
+          OS << "undef";
+        }
       }
       OS << '>';
     }
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 68ddb7b..ff56f4d 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,7 +30,7 @@ const PassInfo *const llvm::MachineLoopInfoID = &X;
 
 bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
   releaseMemory();
-  LI->Calculate(getAnalysis<MachineDominatorTree>().getBase());    // Update
+  LI.Calculate(getAnalysis<MachineDominatorTree>().getBase());    // Update
   return false;
 }
 
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 41a42fd..904b4cb 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -545,26 +545,6 @@ void RALinScan::linearScan()
     if (!isPhys && vrm_->getPreSplitReg(cur.reg))
       continue;
 
-    // A register defined by an implicit_def can be liveout the def BB and livein
-    // to a use BB. Add it to the livein set of the use BB's.
-    if (!isPhys && cur.empty()) {
-      if (MachineInstr *DefMI = mri_->getVRegDef(cur.reg)) {
-        assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
-        MachineBasicBlock *DefMBB = DefMI->getParent();
-        SmallPtrSet<MachineBasicBlock*, 4> Seen;
-        Seen.insert(DefMBB);
-        for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(cur.reg),
-               re = mri_->reg_end(); ri != re; ++ri) {
-          MachineInstr *UseMI = &*ri;
-          MachineBasicBlock *UseMBB = UseMI->getParent();
-          if (Seen.insert(UseMBB)) {
-            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
-                   "Adding a virtual register to livein set?");
-            UseMBB->addLiveIn(Reg);
-          }
-        }
-      }
-    }
     for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
          I != E; ++I) {
       const LiveRange &LR = *I;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 3feb92f..d7fe7a2 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -36,7 +36,7 @@ static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg,
   bool SeenSuperDef = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg())
+    if (!MO.isReg() || MO.isUndef())
       continue;
     if (TRI->isSuperRegister(SubReg, MO.getReg())) {
       if (MO.isUse())
@@ -57,28 +57,22 @@ static bool RedefinesSuperRegPart(const MachineInstr *MI,
 }
 
 /// setUsed - Set the register and its sub-registers as being used.
-void RegScavenger::setUsed(unsigned Reg, bool ImpDef) {
+void RegScavenger::setUsed(unsigned Reg) {
   RegsAvailable.reset(Reg);
-  ImplicitDefed[Reg] = ImpDef;
 
   for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
-       unsigned SubReg = *SubRegs; ++SubRegs) {
+       unsigned SubReg = *SubRegs; ++SubRegs)
     RegsAvailable.reset(SubReg);
-    ImplicitDefed[SubReg] = ImpDef;
-  }
 }
 
 /// setUnused - Set the register and its sub-registers as being unused.
 void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) {
   RegsAvailable.set(Reg);
-  ImplicitDefed.reset(Reg);
 
   for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs)
-    if (!RedefinesSuperRegPart(MI, Reg, TRI)) {
+    if (!RedefinesSuperRegPart(MI, Reg, TRI))
       RegsAvailable.set(SubReg);
-      ImplicitDefed.reset(SubReg);
-    }
 }
 
 void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
@@ -94,7 +88,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
   if (!MBB) {
     NumPhysRegs = TRI->getNumRegs();
     RegsAvailable.resize(NumPhysRegs);
-    ImplicitDefed.resize(NumPhysRegs);
 
     // Create reserved registers bitvector.
     ReservedRegs = TRI->getReservedRegs(MF);
@@ -113,7 +106,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
   ScavengeRestore = NULL;
   CurrDist = 0;
   DistanceMap.clear();
-  ImplicitDefed.reset();
 
   // All registers started out unused.
   RegsAvailable.set();
@@ -195,7 +187,10 @@ void RegScavenger::forward() {
     ScavengeRestore = NULL;
   }
 
-  bool IsImpDef = MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF;
+#if 0
+  if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+    return;
+#endif
 
   // Separate register operands into 3 classes: uses, defs, earlyclobbers.
   SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
@@ -203,7 +198,7 @@ void RegScavenger::forward() {
   SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
+    if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef())
       continue;
     if (MO.isUse())
       UseMOs.push_back(std::make_pair(&MO,i));
@@ -221,14 +216,7 @@ void RegScavenger::forward() {
 
     assert(isUsed(Reg) && "Using an undefined register!");
 
-    // Kill of implicit_def defined registers are ignored. e.g.
-    // entry: 0x2029ab8, LLVM BB @0x1b06080, ID#0:
-    // Live Ins: %R0
-    //  %R0<def> = IMPLICIT_DEF
-    //  %R0<def> = IMPLICIT_DEF
-    //  STR %R0<kill>, %R0, %reg0, 0, 14, %reg0, Mem:ST(4,4) [0x1b06510 + 0]
-    //  %R1<def> = LDR %R0, %reg0, 24, 14, %reg0, Mem:LD(4,4) [0x1b065bc + 0]
-    if (MO.isKill() && !isReserved(Reg) && !isImplicitlyDefined(Reg)) {
+    if (MO.isKill() && !isReserved(Reg)) {
       KillRegs.set(Reg);
 
       // Mark sub-registers as used.
@@ -254,6 +242,8 @@ void RegScavenger::forward() {
     unsigned Idx = (i < NumECs)
       ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second;
     unsigned Reg = MO.getReg();
+    if (MO.isUndef())
+      continue;
 
     // If it's dead upon def, then it is now free.
     if (MO.isDead()) {
@@ -262,7 +252,9 @@ void RegScavenger::forward() {
     }
 
     // Skip two-address destination operand.
-    if (MI->isRegTiedToUseOperand(Idx)) {
+    unsigned UseIdx;
+    if (MI->isRegTiedToUseOperand(Idx, &UseIdx) &&
+        !MI->getOperand(UseIdx).isUndef()) {
       assert(isUsed(Reg) && "Using an undefined register!");
       continue;
     }
@@ -274,10 +266,9 @@ void RegScavenger::forward() {
     // Implicit def is allowed to "re-define" any register. Similarly,
     // implicitly defined registers can be clobbered.
     assert((isReserved(Reg) || isUnused(Reg) ||
-            IsImpDef || isImplicitlyDefined(Reg) ||
             isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
            "Re-defining a live register!");
-    setUsed(Reg, IsImpDef);
+    setUsed(Reg);
   }
 }
 
@@ -297,7 +288,7 @@ void RegScavenger::backward() {
   SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
+    if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef())
       continue;
     if (MO.isUse())
       UseMOs.push_back(std::make_pair(&MO,i));
@@ -316,6 +307,8 @@ void RegScavenger::backward() {
       ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first;
     unsigned Idx = (i < NumECs)
       ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second;
+    if (MO.isUndef())
+      continue;
 
     // Skip two-address destination operand.
     if (MI->isRegTiedToUseOperand(Idx))
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 9ea59ea..4ffe88f 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -20,3 +20,5 @@ add_llvm_library(LLVMSelectionDAG
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
   )
+
+target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 24fccf0..cd2d5ac 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -326,19 +326,14 @@ bool FastISel::SelectCall(User *I) {
   default: break;
   case Intrinsic::dbg_stoppoint: {
     DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
-    if (DIDescriptor::ValidDebugInfo(SPI->getContext(), CodeGenOpt::None)) {
-      DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));
-      unsigned Line = SPI->getLine();
-      unsigned Col = SPI->getColumn();
-      unsigned Idx = MF.getOrCreateDebugLocID(CU.getGV(), Line, Col);
-      setCurDebugLoc(DebugLoc::get(Idx));
-    }
+    if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::None))
+      setCurDebugLoc(ExtractDebugLocation(*SPI, MF.getDebugLocInfo()));
     return true;
   }
   case Intrinsic::dbg_region_start: {
     DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I);
-    if (DIDescriptor::ValidDebugInfo(RSI->getContext(), CodeGenOpt::None) &&
-        DW && DW->ShouldEmitDwarfDebug()) {
+    if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW
+        && DW->ShouldEmitDwarfDebug()) {
       unsigned ID = 
         DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext()));
       const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
@@ -348,11 +343,11 @@ bool FastISel::SelectCall(User *I) {
   }
   case Intrinsic::dbg_region_end: {
     DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I);
-    if (DIDescriptor::ValidDebugInfo(REI->getContext(), CodeGenOpt::None) &&
-        DW && DW->ShouldEmitDwarfDebug()) {
+    if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW
+        && DW->ShouldEmitDwarfDebug()) {
      unsigned ID = 0;
      DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext()));
-     if (!Subprogram.isNull() && !Subprogram.describes(MF.getFunction())) {
+     if (isInlinedFnEnd(*REI, MF.getFunction())) {
         // This is end of an inlined function.
         const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
         ID = DW->RecordInlinedFnEnd(Subprogram);
@@ -372,81 +367,67 @@ bool FastISel::SelectCall(User *I) {
   }
   case Intrinsic::dbg_func_start: {
     DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
-    Value *SP = FSI->getSubprogram();
-    if (!DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::None))
+    if (!isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::None) || !DW
+        || !DW->ShouldEmitDwarfDebug()) 
       return true;
 
-    // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what
-    // (most?) gdb expects.
-    DebugLoc PrevLoc = DL;
-    DISubprogram Subprogram(cast<GlobalVariable>(SP));
-    DICompileUnit CompileUnit = Subprogram.getCompileUnit();
-
-    if (!Subprogram.describes(MF.getFunction())) {
+    if (isInlinedFnStart(*FSI, MF.getFunction())) {
       // This is a beginning of an inlined function.
-
+      
       // If llvm.dbg.func.start is seen in a new block before any
       // llvm.dbg.stoppoint intrinsic then the location info is unknown.
       // FIXME : Why DebugLoc is reset at the beginning of each block ?
+      DebugLoc PrevLoc = DL;
       if (PrevLoc.isUnknown())
         return true;
       // Record the source line.
-      unsigned Line = Subprogram.getLineNumber();
-      setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(
-                                              CompileUnit.getGV(), Line, 0)));
-
-      if (DW && DW->ShouldEmitDwarfDebug()) {
-        DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-        unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,
-                                          DICompileUnit(PrevLocTpl.CompileUnit),
-                                          PrevLocTpl.Line,
-                                          PrevLocTpl.Col);
-        const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
-        BuildMI(MBB, DL, II).addImm(LabelID);
-      }
-    } else {
-      // Record the source line.
-      unsigned Line = Subprogram.getLineNumber();
-      MF.setDefaultDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(
-                                              CompileUnit.getGV(), Line, 0)));
-      if (DW && DW->ShouldEmitDwarfDebug()) {
-        // llvm.dbg.func_start also defines beginning of function scope.
-        DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram()));
-      }
+      setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
+      
+      DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
+      DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram()));
+      unsigned LabelID = DW->RecordInlinedFnStart(SP,
+                                                  DICompileUnit(PrevLocTpl.CompileUnit),
+                                                  PrevLocTpl.Line,
+                                                  PrevLocTpl.Col);
+      const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+      BuildMI(MBB, DL, II).addImm(LabelID);
+      return true;
     }
-
+    
+    // This is a beginning of a new function.
+    MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
+    
+    // llvm.dbg.func_start also defines beginning of function scope.
+    DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram()));
     return true;
   }
   case Intrinsic::dbg_declare: {
     DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    if (!isValidDebugInfoIntrinsic(*DI, CodeGenOpt::None) || !DW
+        || !DW->ShouldEmitDwarfDebug())
+      return true;
+
     Value *Variable = DI->getVariable();
-    if (DIDescriptor::ValidDebugInfo(Variable, CodeGenOpt::None) &&
-        DW && DW->ShouldEmitDwarfDebug()) {
-      // Determine the address of the declared object.
-      Value *Address = DI->getAddress();
-      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
-        Address = BCI->getOperand(0);
-      AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-      // Don't handle byval struct arguments or VLAs, for example.
-      if (!AI) break;
-      DenseMap<const AllocaInst*, int>::iterator SI =
-        StaticAllocaMap.find(AI);
-      if (SI == StaticAllocaMap.end()) break; // VLAs.
-      int FI = SI->second;
-
-      // Determine the debug globalvariable.
-      GlobalValue *GV = cast<GlobalVariable>(Variable);
-
-      // Build the DECLARE instruction.
-      const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE);
-      MachineInstr *DeclareMI 
-        = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV);
-      DIVariable DV(cast<GlobalVariable>(GV));
-      if (!DV.isNull()) {
-        // This is a local variable
-        DW->RecordVariableScope(DV, DeclareMI);
-      }
-    }
+    Value *Address = DI->getAddress();
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+      Address = BCI->getOperand(0);
+    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI) break;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      StaticAllocaMap.find(AI);
+    if (SI == StaticAllocaMap.end()) break; // VLAs.
+    int FI = SI->second;
+    
+    // Determine the debug globalvariable.
+    GlobalValue *GV = cast<GlobalVariable>(Variable);
+    
+    // Build the DECLARE instruction.
+    const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE);
+    MachineInstr *DeclareMI 
+      = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV);
+    DIVariable DV(cast<GlobalVariable>(GV));
+    DW->RecordVariableScope(DV, DeclareMI);
     return true;
   }
   case Intrinsic::eh_exception: {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index ef365e6..1413d95 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1900,7 +1900,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   const Type *RetTy = Node->getValueType(0).getTypeForMVT();
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    CallingConv::C, false, Callee, Args, DAG,
+                    0, CallingConv::C, false, Callee, Args, DAG,
                     Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
@@ -2305,7 +2305,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     TargetLowering::ArgListTy Args;
     std::pair<SDValue, SDValue> CallResult =
       TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy,
-                      false, false, false, false, CallingConv::C, false,
+                      false, false, false, false, 0, CallingConv::C, false,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
     Results.push_back(CallResult.second);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 00d71e1..3135a44 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1006,7 +1006,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
   const Type *RetTy = RetVT.getTypeForMVT();
   std::pair<SDValue,SDValue> CallInfo =
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                    false, CallingConv::C, false, Callee, Args, DAG, dl);
+                    false, 0, CallingConv::C, false, Callee, Args, DAG, dl);
   return CallInfo.first;
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
index e372b5b..7926339 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -617,7 +617,7 @@ void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, 
-                                                   false, 0, true));
+                                                   false, false, true));
         }
         break;
       case 1:  // Use of register.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0342f67..c8f4b52 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3375,7 +3375,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   // FIXME: pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, CallingConv::C, false,
+                    false, false, false, false, 0, CallingConv::C, false,
                     getExternalSymbol("memcpy", TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
@@ -3421,7 +3421,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // FIXME:  pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, CallingConv::C, false,
+                    false, false, false, false, 0, CallingConv::C, false,
                     getExternalSymbol("memmove", TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
@@ -3473,7 +3473,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   // FIXME: pass in DebugLoc
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::VoidTy,
-                    false, false, false, false, CallingConv::C, false,
+                    false, false, false, false, 0, CallingConv::C, false,
                     getExternalSymbol("memset", TLI.getPointerTy()),
                     Args, *this, dl);
   return CallResult.second;
@@ -3605,7 +3605,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
 SDValue
 SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,
                       bool IsTailCall, bool IsInreg, SDVTList VTs,
-                      const SDValue *Operands, unsigned NumOperands) {
+                      const SDValue *Operands, unsigned NumOperands,
+                      unsigned NumFixedArgs) {
   // Do not include isTailCall in the folding set profile.
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands);
@@ -3621,7 +3622,7 @@ SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,
   }
   SDNode *N = NodeAllocator.Allocate<CallSDNode>();
   new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg,
-                     VTs, Operands, NumOperands);
+                     VTs, Operands, NumOperands, NumFixedArgs);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 48ebd0f..260911e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -332,30 +332,14 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
           default: break;
           case Intrinsic::dbg_stoppoint: {
             DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
-
-            if (DIDescriptor::ValidDebugInfo(SPI->getContext(),
-                                             CodeGenOpt::Default)) {
-              DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));
-              unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(),
-                                                       SPI->getLine(),
-                                                       SPI->getColumn());
-              DL = DebugLoc::get(idx);
-            }
-
+            if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::Default)) 
+              DL = ExtractDebugLocation(*SPI, MF->getDebugLocInfo());
             break;
           }
           case Intrinsic::dbg_func_start: {
             DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
-            Value *SP = FSI->getSubprogram();
-
-            if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) {
-              DISubprogram Subprogram(cast<GlobalVariable>(SP));
-              DICompileUnit CU(Subprogram.getCompileUnit());
-              unsigned Line = Subprogram.getLineNumber();
-              DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(),
-                                                           Line, 0));
-            }
-
+            if (isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::Default)) 
+              DL = ExtractDebugLocation(*FSI, MF->getDebugLocInfo());
             break;
           }
           }
@@ -3887,13 +3871,11 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_stoppoint: {
     DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
-    if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) {
+    if (isValidDebugInfoIntrinsic(SPI, CodeGenOpt::Default)) {
       MachineFunction &MF = DAG.getMachineFunction();
-      DICompileUnit CU(cast<GlobalVariable>(SPI.getContext()));
-      DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(),
-                                              SPI.getLine(), SPI.getColumn()));
+      DebugLoc Loc = ExtractDebugLocation(SPI, MF.getDebugLocInfo());
       setCurDebugLoc(Loc);
-      
+
       if (OptLevel == CodeGenOpt::None)
         DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),
                                         SPI.getLine(),
@@ -3905,135 +3887,103 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   case Intrinsic::dbg_region_start: {
     DwarfWriter *DW = DAG.getDwarfWriter();
     DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
-
-    if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) &&
-        DW && DW->ShouldEmitDwarfDebug()) {
+    if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW
+        && DW->ShouldEmitDwarfDebug()) {
       unsigned LabelID =
         DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext()));
       DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
                                getRoot(), LabelID));
     }
-
     return 0;
   }
   case Intrinsic::dbg_region_end: {
     DwarfWriter *DW = DAG.getDwarfWriter();
     DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
 
-    if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) &&
-        DW && DW->ShouldEmitDwarfDebug()) {
-      MachineFunction &MF = DAG.getMachineFunction();
-      DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+    if (!isValidDebugInfoIntrinsic(REI, OptLevel) || !DW
+        || !DW->ShouldEmitDwarfDebug()) 
+      return 0;
 
-      if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) {
-        unsigned LabelID =
-          DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
-        DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
-                                 getRoot(), LabelID));
-      } else {
-        // This is end of inlined function. Debugging information for inlined
-        // function is not handled yet (only supported by FastISel).
-        if (OptLevel == CodeGenOpt::None) {
-          unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
-          if (ID != 0)
-            // Returned ID is 0 if this is unbalanced "end of inlined
-            // scope". This could happen if optimizer eats dbg intrinsics or
-            // "beginning of inlined scope" is not recoginized due to missing
-            // location info. In such cases, do ignore this region.end.
-            DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), 
-                                     getRoot(), ID));
-        }
+    MachineFunction &MF = DAG.getMachineFunction();
+    DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+    
+    if (isInlinedFnEnd(REI, MF.getFunction())) {
+      // This is end of inlined function. Debugging information for inlined
+      // function is not handled yet (only supported by FastISel).
+      if (OptLevel == CodeGenOpt::None) {
+        unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
+        if (ID != 0)
+          // Returned ID is 0 if this is unbalanced "end of inlined
+          // scope". This could happen if optimizer eats dbg intrinsics or
+          // "beginning of inlined scope" is not recoginized due to missing
+          // location info. In such cases, do ignore this region.end.
+          DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), 
+                                   getRoot(), ID));
       }
-    }
+      return 0;
+    } 
 
+    unsigned LabelID =
+      DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
+    DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+                             getRoot(), LabelID));
     return 0;
   }
   case Intrinsic::dbg_func_start: {
     DwarfWriter *DW = DAG.getDwarfWriter();
     DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
-    Value *SP = FSI.getSubprogram();
-    if (!DIDescriptor::ValidDebugInfo(SP, OptLevel))
+    if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None) || !DW
+        || !DW->ShouldEmitDwarfDebug()) 
       return 0;
 
     MachineFunction &MF = DAG.getMachineFunction();
-    if (OptLevel == CodeGenOpt::None) {
-      // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what
-      // (most?) gdb expects.
+    // This is a beginning of an inlined function.
+    if (isInlinedFnStart(FSI, MF.getFunction())) {
+      if (OptLevel != CodeGenOpt::None)
+        // FIXME: Debugging informaation for inlined function is only
+        // supported at CodeGenOpt::Node.
+        return 0;
+      
       DebugLoc PrevLoc = CurDebugLoc;
-      DISubprogram Subprogram(cast<GlobalVariable>(SP));
-      DICompileUnit CompileUnit = Subprogram.getCompileUnit();
-
-      if (!Subprogram.describes(MF.getFunction())) {
-        // This is a beginning of an inlined function.
-
-        // If llvm.dbg.func.start is seen in a new block before any
-        // llvm.dbg.stoppoint intrinsic then the location info is unknown.
-        // FIXME : Why DebugLoc is reset at the beginning of each block ?
-        if (PrevLoc.isUnknown())
-          return 0;
-
-        // Record the source line.
-        unsigned Line = Subprogram.getLineNumber();
-        setCurDebugLoc(DebugLoc::get(
-                     MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
-
-        if (DW && DW->ShouldEmitDwarfDebug()) {
-          DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
-          unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,
-                                          DICompileUnit(PrevLocTpl.CompileUnit),
-                                          PrevLocTpl.Line,
-                                          PrevLocTpl.Col);
-          DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
-                                   getRoot(), LabelID));
-        }
-      } else {
-        // Record the source line.
-        unsigned Line = Subprogram.getLineNumber();
-        MF.setDefaultDebugLoc(DebugLoc::get(
-                     MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
-        if (DW && DW->ShouldEmitDwarfDebug()) {
-          // llvm.dbg.func_start also defines beginning of function scope.
-          DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));
-        }
-      }
-    } else {
-      DISubprogram Subprogram(cast<GlobalVariable>(SP));
-
-      std::string SPName;
-      Subprogram.getLinkageName(SPName);
-      if (!SPName.empty()
-          && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) {
-        // This is beginning of inlined function. Debugging information for
-        // inlined function is not handled yet (only supported by FastISel).
+      // If llvm.dbg.func.start is seen in a new block before any
+      // llvm.dbg.stoppoint intrinsic then the location info is unknown.
+      // FIXME : Why DebugLoc is reset at the beginning of each block ?
+      if (PrevLoc.isUnknown())
         return 0;
-      }
-
-      // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is
-      // what (most?) gdb expects.
-      DICompileUnit CompileUnit = Subprogram.getCompileUnit();
-
-      // Record the source line but does not create a label for the normal
-      // function start. It will be emitted at asm emission time. However,
-      // create a label if this is a beginning of inlined function.
-      unsigned Line = Subprogram.getLineNumber();
-      setCurDebugLoc(DebugLoc::get(
-                     MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
-      // FIXME -  Start new region because llvm.dbg.func_start also defines
-      // beginning of function scope.
+      
+      // Record the source line.
+      setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
+      
+      DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
+      DISubprogram SP(cast<GlobalVariable>(FSI.getSubprogram()));
+      DICompileUnit CU(PrevLocTpl.CompileUnit);
+      unsigned LabelID = DW->RecordInlinedFnStart(SP, CU,
+                                                  PrevLocTpl.Line,
+                                                  PrevLocTpl.Col);
+      DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+                               getRoot(), LabelID));
+      return 0;
     }
 
+    // This is a beginning of a new function.
+    MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
+                    
+    // llvm.dbg.func_start also defines beginning of function scope.
+    DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));
     return 0;
   }
   case Intrinsic::dbg_declare: {
-    if (OptLevel == CodeGenOpt::None) {
-      DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-      Value *Variable = DI.getVariable();
-      if (DIDescriptor::ValidDebugInfo(Variable, OptLevel))
-        DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),
-                                getValue(DI.getAddress()), getValue(Variable)));
-    } else {
-      // FIXME: Do something sensible here when we support debug declare.
-    }
+    if (OptLevel != CodeGenOpt::None) 
+      // FIXME: Variable debug info is not supported here.
+      return 0;
+
+    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
+      return 0;
+
+    Value *Variable = DI.getVariable();
+    DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),
+                            getValue(DI.getAddress()), getValue(Variable)));
     return 0;
   }
   case Intrinsic::eh_exception: {
@@ -4466,7 +4416,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
     TLI.LowerCallTo(getRoot(), CS.getType(),
                     CS.paramHasAttr(0, Attribute::SExt),
                     CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
-                    CS.paramHasAttr(0, Attribute::InReg),
+                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
                     CS.getCallingConv(),
                     IsTailCall && PerformTailCallOpt,
                     Callee, Args, DAG, getCurDebugLoc());
@@ -5518,7 +5468,7 @@ void SelectionDAGLowering::visitMalloc(MallocInst &I) {
 
   std::pair<SDValue,SDValue> Result =
     TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false,
-                    CallingConv::C, PerformTailCallOpt,
+                    0, CallingConv::C, PerformTailCallOpt,
                     DAG.getExternalSymbol("malloc", IntPtr),
                     Args, DAG, getCurDebugLoc());
   setValue(&I, Result.first);  // Pointers always fit in registers
@@ -5534,7 +5484,7 @@ void SelectionDAGLowering::visitFree(FreeInst &I) {
   MVT IntPtr = TLI.getPointerTy();
   std::pair<SDValue,SDValue> Result =
     TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false,
-                    CallingConv::C, PerformTailCallOpt,
+                    0, CallingConv::C, PerformTailCallOpt,
                     DAG.getExternalSymbol("free", IntPtr), Args, DAG,
                     getCurDebugLoc());
   DAG.setRoot(Result.second);
@@ -5707,7 +5657,7 @@ void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
 std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             bool RetSExt, bool RetZExt, bool isVarArg,
-                            bool isInreg,
+                            bool isInreg, unsigned NumFixedArgs,
                             unsigned CallingConv, bool isTailCall,
                             SDValue Callee,
                             ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
@@ -5805,7 +5755,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             isVarArg, isTailCall, isInreg,
                             DAG.getVTList(&LoweredRetTys[0],
                                           LoweredRetTys.size()),
-                            &Ops[0], Ops.size()
+                            &Ops[0], Ops.size(), NumFixedArgs
                             );
   Chain = Res.getValue(LoweredRetTys.size() - 1);
 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index c2105e6..7e7d6b8 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -2669,19 +2669,28 @@ SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
   CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
   for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
     CopyMI->RemoveOperand(i);
+  CopyMI->getOperand(0).setIsUndef();
   bool NoUse = mri_->use_empty(SrcReg);
   if (NoUse) {
-    for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
-           E = mri_->reg_end(); I != E; ) {
-      assert(I.getOperand().isDef());
-      MachineInstr *DefMI = &*I;
-      ++I;
+    for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(SrcReg),
+           RE = mri_->reg_end(); RI != RE; ) {
+      assert(RI.getOperand().isDef());
+      MachineInstr *DefMI = &*RI;
+      ++RI;
       // The implicit_def source has no other uses, delete it.
       assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
       li_->RemoveMachineInstrFromMaps(DefMI);
       DefMI->eraseFromParent();
     }
   }
+
+  // Mark uses of implicit_def isUndef.
+  for (MachineRegisterInfo::use_iterator RI = mri_->use_begin(DstReg),
+         RE = mri_->use_end(); RI != RE; ++RI) {
+    assert((*RI).getParent() == MBB);
+    RI.getOperand().setIsUndef();
+  }
+
   ++I;
   return true;
 }
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index a5e1ee4..b759599 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -24,14 +24,19 @@ using namespace llvm;
 // operand 1 and 2.
 MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
                                                       bool NewMI) const {
-  assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+  const TargetInstrDesc &TID = MI->getDesc();
+  bool HasDef = TID.getNumDefs();
+  unsigned Idx1 = HasDef ? 1 : 0;
+  unsigned Idx2 = HasDef ? 2 : 1;
+
+  assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
          "This only knows how to commute register operands so far");
-  unsigned Reg1 = MI->getOperand(1).getReg();
-  unsigned Reg2 = MI->getOperand(2).getReg();
-  bool Reg1IsKill = MI->getOperand(1).isKill();
-  bool Reg2IsKill = MI->getOperand(2).isKill();
+  unsigned Reg1 = MI->getOperand(Idx1).getReg();
+  unsigned Reg2 = MI->getOperand(Idx2).getReg();
+  bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+  bool Reg2IsKill = MI->getOperand(Idx2).isKill();
   bool ChangeReg0 = false;
-  if (MI->getOperand(0).getReg() == Reg1) {
+  if (HasDef && MI->getOperand(0).getReg() == Reg1) {
     // Must be two address instruction!
     assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
            "Expecting a two-address instruction!");
@@ -41,21 +46,27 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 
   if (NewMI) {
     // Create a new instruction.
-    unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
-    bool Reg0IsDead = MI->getOperand(0).isDead();
+    unsigned Reg0 = HasDef
+      ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0;
+    bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
     MachineFunction &MF = *MI->getParent()->getParent();
-    return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
-      .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
-      .addReg(Reg2, getKillRegState(Reg2IsKill))
-      .addReg(Reg1, getKillRegState(Reg2IsKill));
+    if (HasDef)
+      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+        .addReg(Reg2, getKillRegState(Reg2IsKill))
+        .addReg(Reg1, getKillRegState(Reg2IsKill));
+    else
+      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+        .addReg(Reg2, getKillRegState(Reg2IsKill))
+        .addReg(Reg1, getKillRegState(Reg2IsKill));
   }
 
   if (ChangeReg0)
     MI->getOperand(0).setReg(Reg2);
-  MI->getOperand(2).setReg(Reg1);
-  MI->getOperand(1).setReg(Reg2);
-  MI->getOperand(2).setIsKill(Reg1IsKill);
-  MI->getOperand(1).setIsKill(Reg2IsKill);
+  MI->getOperand(Idx2).setReg(Reg1);
+  MI->getOperand(Idx1).setReg(Reg2);
+  MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+  MI->getOperand(Idx1).setIsKill(Reg2IsKill);
   return MI;
 }
 
@@ -66,6 +77,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 /// two-address instruction.
 bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI,
                                                     unsigned &OpIdx) const{
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.getNumDefs())
+    return false;
   assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
          "This only knows how to commute register operands so far");
   if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index bd6584a..be0b016 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -356,7 +356,7 @@ static void InvalidateKills(MachineInstr &MI,
                             SmallVector<unsigned, 2> *KillRegs = NULL) {
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
       continue;
     unsigned Reg = MO.getReg();
     if (TargetRegisterInfo::isVirtualRegister(Reg))
@@ -390,12 +390,12 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I,
   MachineOperand *DefOp = NULL;
   for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = DefMI->getOperand(i);
-    if (MO.isReg() && MO.isDef()) {
-      if (MO.getReg() == Reg)
-        DefOp = &MO;
-      else if (!MO.isDead())
-        HasLiveDef = true;
-    }
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+      continue;
+    if (MO.getReg() == Reg)
+      DefOp = &MO;
+    else if (!MO.isDead())
+      HasLiveDef = true;
   }
   if (!DefOp)
     return false;
@@ -430,7 +430,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
                         std::vector<MachineOperand*> &KillOps) {
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isUse())
+    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
       continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0)
@@ -1289,8 +1289,7 @@ private:
           if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) {
             MachineInstr *DeadDef = PrevMII;
             if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
-              // FIXME: This assumes a remat def does not have side
-              // effects.
+              // FIXME: This assumes a remat def does not have side effects.
               VRM.RemoveMachineInstrFromMaps(DeadDef);
               MBB.erase(DeadDef);
               ++NumDRM;
@@ -1569,6 +1568,8 @@ private:
         if (MO.isImplicit())
           // If the virtual register is implicitly defined, emit a implicit_def
           // before so scavenger knows it's "defined".
+          // FIXME: This is a horrible hack done the by register allocator to
+          // remat a definition with virtual register operand.
           VirtUseOps.insert(VirtUseOps.begin(), i);
         else
           VirtUseOps.push_back(i);
@@ -1595,6 +1596,7 @@ private:
           MI.getOperand(i).setReg(RReg);
           MI.getOperand(i).setSubReg(0);
           if (VRM.isImplicitlyDefined(VirtReg))
+            // FIXME: Is this needed?
             BuildMI(MBB, &MI, MI.getDebugLoc(),
                     TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg);
           continue;
@@ -1604,22 +1606,16 @@ private:
         if (!MO.isUse())
           continue;  // Handle defs in the loop below (handle use&def here though)
 
-        bool AvoidReload = false;
-        if (LIs->hasInterval(VirtReg)) {
-          LiveInterval &LI = LIs->getInterval(VirtReg);
-          if (!LI.liveAt(LIs->getUseIndex(LI.beginNumber())))
-            // Must be defined by an implicit def. It should not be spilled. Note,
-            // this is for correctness reason. e.g.
-            // 8   %reg1024<def> = IMPLICIT_DEF
-            // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-            // The live range [12, 14) are not part of the r1024 live interval since
-            // it's defined by an implicit def. It will not conflicts with live
-            // interval of r1025. Now suppose both registers are spilled, you can
-            // easily see a situation where both registers are reloaded before
-            // the INSERT_SUBREG and both target registers that would overlap.
-            AvoidReload = true;
-        }
-
+        bool AvoidReload = MO.isUndef();
+        // Check if it is defined by an implicit def. It should not be spilled.
+        // Note, this is for correctness reason. e.g.
+        // 8   %reg1024<def> = IMPLICIT_DEF
+        // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+        // The live range [12, 14) are not part of the r1024 live interval since
+        // it's defined by an implicit def. It will not conflicts with live
+        // interval of r1025. Now suppose both registers are spilled, you can
+        // easily see a situation where both registers are reloaded before
+        // the INSERT_SUBREG and both target registers that would overlap.
         bool DoReMat = VRM.isReMaterialized(VirtReg);
         int SSorRMId = DoReMat
           ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
@@ -2033,8 +2029,12 @@ private:
         if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
           // Check to see if this is a noop copy.  If so, eliminate the
           // instruction before considering the dest reg to be changed.
+          // Also check if it's copying from an "undef", if so, we can't
+          // eliminate this or else the undef marker is lost and it will
+          // confuses the scavenger. This is extremely rare.
           unsigned Src, Dst, SrcSR, DstSR;
-          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
+              !MI.findRegisterUseOperand(Src)->isUndef()) {
             ++NumDCE;
             DOUT << "Removing now-noop copy: " << MI;
             SmallVector<unsigned, 2> KillRegs;
@@ -2053,7 +2053,7 @@ private:
             Spills.disallowClobberPhysReg(VirtReg);
             goto ProcessNextInst;
           }
-            
+
           // If it's not a no-op copy, it clobbers the value in the destreg.
           Spills.ClobberPhysReg(VirtReg);
           ReusedOperands.markClobbered(VirtReg);
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 816f793..5fd63ee 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -13,10 +13,8 @@
 
 #include "llvm/CompilerDriver/Action.h"
 #include "llvm/CompilerDriver/BuiltinOptions.h"
-
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Program.h"
-
-#include <iostream>
 #include <stdexcept>
 
 using namespace llvm;
@@ -58,15 +56,15 @@ namespace {
   }
 
   void print_string (const std::string& str) {
-    std::cerr << str << ' ';
+    errs() << str << ' ';
   }
 }
 
 int llvmc::Action::Execute() const {
   if (DryRun || VerboseMode) {
-    std::cerr << Command_ << " ";
+    errs() << Command_ << " ";
     std::for_each(Args_.begin(), Args_.end(), print_string);
-    std::cerr << '\n';
+    errs() << '\n';
   }
   if (DryRun)
     return 0;
diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp
new file mode 100644
index 0000000..a3364e8
--- /dev/null
+++ b/lib/CompilerDriver/BuiltinOptions.cpp
@@ -0,0 +1,55 @@
+//===--- BuiltinOptions.cpp - The LLVM Compiler Driver ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Definitions of all global command-line option variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+
+#ifdef ENABLE_LLVMC_DYNAMIC_PLUGINS
+#include "llvm/Support/PluginLoader.h"
+#endif
+
+namespace cl = llvm::cl;
+
+// External linkage here is intentional.
+
+cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
+                                     cl::ZeroOrMore);
+cl::opt<std::string> OutputFilename("o", cl::desc("Output file name"),
+                                    cl::value_desc("file"), cl::Prefix);
+cl::list<std::string> Languages("x",
+          cl::desc("Specify the language of the following input files"),
+          cl::ZeroOrMore);
+cl::opt<bool> DryRun("dry-run",
+                     cl::desc("Only pretend to run commands"));
+cl::opt<bool> VerboseMode("v",
+                          cl::desc("Enable verbose mode"));
+
+cl::opt<bool> CheckGraph("check-graph",
+                         cl::desc("Check the compilation graph for errors"),
+                         cl::Hidden);
+cl::opt<bool> WriteGraph("write-graph",
+                         cl::desc("Write compilation-graph.dot file"),
+                         cl::Hidden);
+cl::opt<bool> ViewGraph("view-graph",
+                         cl::desc("Show compilation graph in GhostView"),
+                         cl::Hidden);
+
+cl::opt<SaveTempsEnum::Values> SaveTemps
+("save-temps", cl::desc("Keep temporary files"),
+ cl::init(SaveTempsEnum::Unset),
+ cl::values(clEnumValN(SaveTempsEnum::Obj, "obj",
+                       "Save files in the directory specified with -o"),
+            clEnumValN(SaveTempsEnum::Cwd, "cwd",
+                       "Use current working directory"),
+            clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
+            clEnumValEnd),
+ cl::ValueOptional);
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index 1212a21..f303943 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -18,10 +18,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 
 #include <algorithm>
 #include <cstring>
-#include <iostream>
 #include <iterator>
 #include <limits>
 #include <queue>
@@ -346,8 +346,8 @@ int CompilationGraph::CheckLanguageNames() const {
 
         if (!N2.ToolPtr) {
           ++ret;
-          std::cerr << "Error: there is an edge from '" << N1.ToolPtr->Name()
-                    << "' back to the root!\n\n";
+          errs() << "Error: there is an edge from '" << N1.ToolPtr->Name()
+                 << "' back to the root!\n\n";
           continue;
         }
 
@@ -363,17 +363,17 @@ int CompilationGraph::CheckLanguageNames() const {
 
         if (!eq) {
           ++ret;
-          std::cerr << "Error: Output->input language mismatch in the edge '" <<
-            N1.ToolPtr->Name() << "' -> '" << N2.ToolPtr->Name() << "'!\n";
-
-          std::cerr << "Expected one of { ";
+          errs() << "Error: Output->input language mismatch in the edge '"
+                 << N1.ToolPtr->Name() << "' -> '" << N2.ToolPtr->Name()
+                 << "'!\n"
+                 << "Expected one of { ";
 
           InLangs = N2.ToolPtr->InputLanguages();
           for (;*InLangs; ++InLangs) {
-            std::cerr << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
+            errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
           }
 
-          std::cerr << " }, but got '" << OutLang << "'!\n\n";
+          errs() << " }, but got '" << OutLang << "'!\n\n";
         }
 
       }
@@ -406,9 +406,8 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
       }
       else if (EdgeWeight == MaxWeight) {
         ++ret;
-        std::cerr
-          << "Error: there are multiple maximal edges stemming from the '"
-          << N.ToolPtr->Name() << "' node!\n\n";
+        errs() << "Error: there are multiple maximal edges stemming from the '"
+               << N.ToolPtr->Name() << "' node!\n\n";
         break;
       }
     }
@@ -440,9 +439,9 @@ int CompilationGraph::CheckCycles() {
   }
 
   if (deleted != NodesMap.size()) {
-    std::cerr << "Error: there are cycles in the compilation graph!\n"
-              << "Try inspecting the diagram produced by "
-      "'llvmc --view-graph'.\n\n";
+    errs() << "Error: there are cycles in the compilation graph!\n"
+           << "Try inspecting the diagram produced by "
+           << "'llvmc --view-graph'.\n\n";
     return 1;
   }
 
@@ -518,9 +517,9 @@ void CompilationGraph::writeGraph(const std::string& OutputFilename) {
   std::ofstream O(OutputFilename.c_str());
 
   if (O.good()) {
-    std::cerr << "Writing '"<< OutputFilename << "' file...";
+    errs() << "Writing '"<< OutputFilename << "' file...";
     llvm::WriteGraph(O, this);
-    std::cerr << "done.\n";
+    errs() << "done.\n";
     O.close();
   }
   else {
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
new file mode 100644
index 0000000..c9c0413
--- /dev/null
+++ b/lib/CompilerDriver/Main.cpp
@@ -0,0 +1,130 @@
+//===--- Main.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  llvmc::Main function - driver entry point.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+#include "llvm/CompilerDriver/Plugin.h"
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Path.h"
+
+#include <stdexcept>
+#include <string>
+
+namespace cl = llvm::cl;
+namespace sys = llvm::sys;
+using namespace llvmc;
+
+namespace {
+
+  sys::Path getTempDir() {
+    sys::Path tempDir;
+
+    // GCC 4.5-style -save-temps handling.
+    if (SaveTemps == SaveTempsEnum::Unset) {
+      tempDir = sys::Path::GetTemporaryDirectory();
+    }
+    else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
+      tempDir = OutputFilename;
+
+      if (!tempDir.exists()) {
+        std::string ErrMsg;
+        if (tempDir.createDirectoryOnDisk(true, &ErrMsg))
+          throw std::runtime_error(ErrMsg);
+      }
+    }
+    // else if (SaveTemps == Cwd) -> use current dir (leave tempDir empty)
+
+    return tempDir;
+  }
+
+  /// BuildTargets - A small wrapper for CompilationGraph::Build.
+  int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
+    int ret;
+    const sys::Path& tempDir = getTempDir();
+
+    try {
+      ret = graph.Build(tempDir, langMap);
+    }
+    catch(...) {
+      if (SaveTemps == SaveTempsEnum::Unset)
+        tempDir.eraseFromDisk(true);
+      throw;
+    }
+
+    if (SaveTemps == SaveTempsEnum::Unset)
+      tempDir.eraseFromDisk(true);
+    return ret;
+  }
+}
+
+namespace llvmc {
+
+// Sometimes plugins want to condition on the value in argv[0].
+const char* ProgramName;
+
+int Main(int argc, char** argv) {
+  try {
+    LanguageMap langMap;
+    CompilationGraph graph;
+
+    ProgramName = argv[0];
+
+    cl::ParseCommandLineOptions
+      (argc, argv, "LLVM Compiler Driver (Work In Progress)", true);
+
+    PluginLoader Plugins;
+    Plugins.PopulateLanguageMap(langMap);
+    Plugins.PopulateCompilationGraph(graph);
+
+    if (CheckGraph) {
+      int ret = graph.Check();
+      if (!ret)
+        llvm::errs() << "check-graph: no errors found.\n";
+
+      return ret;
+    }
+
+    if (ViewGraph) {
+      graph.viewGraph();
+      if (!WriteGraph)
+        return 0;
+    }
+
+    if (WriteGraph) {
+      graph.writeGraph(OutputFilename.empty()
+                       ? std::string("compilation-graph.dot")
+                       : OutputFilename);
+      return 0;
+    }
+
+    if (InputFilenames.empty()) {
+      throw std::runtime_error("no input files");
+    }
+
+    return BuildTargets(graph, langMap);
+  }
+  catch(llvmc::error_code& ec) {
+    return ec.code();
+  }
+  catch(const std::exception& ex) {
+    llvm::errs() << argv[0] << ": " << ex.what() << '\n';
+  }
+  catch(...) {
+    llvm::errs() << argv[0] << ": unknown error!\n";
+  }
+  return 1;
+}
+
+} // end namespace llvmc
diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile
index e5bf3e1..a5ecfd5 100644
--- a/lib/CompilerDriver/Makefile
+++ b/lib/CompilerDriver/Makefile
@@ -12,8 +12,36 @@ LEVEL = ../..
 # We don't want this library to appear in `llvm-config --libs` output, so its
 # name doesn't start with "LLVM".
 
-LIBRARYNAME = CompilerDriver
-LINK_COMPONENTS = support system
+ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
+  LIBRARYNAME = libCompilerDriver
+  LLVMLIBS = LLVMSupport.a LLVMSystem.a
+  LOADABLE_MODULE := 1
+else
+  LIBRARYNAME = CompilerDriver
+  LINK_COMPONENTS = support system
+endif
+
 REQUIRES_EH := 1
 
 include $(LEVEL)/Makefile.common
+
+ifeq ($(ENABLE_LLVMC_DYNAMIC_PLUGINS), 1)
+    CPP.Flags += -DENABLE_LLVMC_DYNAMIC_PLUGINS
+endif
+
+# Copy libCompilerDriver to the bin dir so that llvmc can find it.
+ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
+
+FullLibName = $(LIBRARYNAME)$(SHLIBEXT)
+
+all-local:: $(ToolDir)/$(FullLibName)
+
+$(ToolDir)/$(FullLibName): $(LibDir)/$(FullLibName) $(ToolDir)/.dir
+	$(Echo) Copying $(BuildMode) Shared Library $(FullLibName) to $@
+	-$(Verb) $(CP) $< $@
+
+clean-local::
+	$(Echo) Removing $(BuildMode) Shared Library $(FullLibName) \
+	from $(ToolDir)
+	-$(Verb) $(RM) -f $(ToolDir)/$(FullLibName)
+endif
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index e704dd9..7953dd2 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -14,11 +14,17 @@
 #include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/Tool.h"
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/System/Path.h"
 
 using namespace llvm;
 using namespace llvmc;
 
+// SplitString is used by derived Tool classes.
+typedef void (*SplitStringFunPtr)(const std::string&,
+                                  std::vector<std::string>&, const char*);
+SplitStringFunPtr ForceLinkageSplitString = &llvm::SplitString;
+
 namespace {
   sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
                          const std::string& Suffix) {
diff --git a/lib/Debugger/Debugger.cpp b/lib/Debugger/Debugger.cpp
index b12d90a..77fd2ac 100644
--- a/lib/Debugger/Debugger.cpp
+++ b/lib/Debugger/Debugger.cpp
@@ -46,11 +46,12 @@ std::string Debugger::getProgramPath() const {
 }
 
 static Module *
-getMaterializedModuleProvider(const std::string &Filename) {
+getMaterializedModuleProvider(const std::string &Filename,
+                              LLVMContext& C) {
   std::auto_ptr<MemoryBuffer> Buffer;
   Buffer.reset(MemoryBuffer::getFileOrSTDIN(Filename.c_str()));
   if (Buffer.get())
-    return ParseBitcodeFile(Buffer.get());
+    return ParseBitcodeFile(Buffer.get(), C);
   return 0;
 }
 
@@ -58,9 +59,9 @@ getMaterializedModuleProvider(const std::string &Filename) {
 /// the PATH for the specified program, loading it when found.  If the
 /// specified program cannot be found, an exception is thrown to indicate the
 /// error.
-void Debugger::loadProgram(const std::string &Filename) {
-  if ((Program = getMaterializedModuleProvider(Filename)) ||
-      (Program = getMaterializedModuleProvider(Filename+".bc")))
+void Debugger::loadProgram(const std::string &Filename, LLVMContext& C) {
+  if ((Program = getMaterializedModuleProvider(Filename, C)) ||
+      (Program = getMaterializedModuleProvider(Filename+".bc", C)))
     return;   // Successfully loaded the program.
 
   // Search the program path for the file...
@@ -69,9 +70,9 @@ void Debugger::loadProgram(const std::string &Filename) {
 
     std::string Directory = getToken(Path, ":");
     while (!Directory.empty()) {
-      if ((Program = getMaterializedModuleProvider(Directory +"/"+ Filename)) ||
-          (Program = getMaterializedModuleProvider(Directory +"/"+ Filename
-                                                                      + ".bc")))
+      if ((Program = getMaterializedModuleProvider(Directory +"/"+ Filename, C))
+       || (Program = getMaterializedModuleProvider(Directory +"/"+ Filename
+                                                                   + ".bc", C)))
         return;   // Successfully loaded the program.
 
       Directory = getToken(Path, ":");
diff --git a/lib/Debugger/ProgramInfo.cpp b/lib/Debugger/ProgramInfo.cpp
index 125ff55..e58b3d5 100644
--- a/lib/Debugger/ProgramInfo.cpp
+++ b/lib/Debugger/ProgramInfo.cpp
@@ -271,8 +271,7 @@ ProgramInfo::getSourceFiles(bool RequiresCompleteMap) {
   // should be on the use list of the llvm.dbg.translation_units global.
   //
   GlobalVariable *Units =
-    M->getGlobalVariable("llvm.dbg.translation_units",
-                         StructType::get(std::vector<const Type*>()));
+    M->getGlobalVariable("llvm.dbg.translation_units", StructType::get());
   if (Units == 0)
     throw "Program contains no debugging information!";
 
@@ -354,8 +353,7 @@ ProgramInfo::getSourceFunctions(bool RequiresCompleteMap) {
   // should be on the use list of the llvm.dbg.translation_units global.
   //
   GlobalVariable *Units =
-    M->getGlobalVariable("llvm.dbg.globals",
-                         StructType::get(std::vector<const Type*>()));
+    M->getGlobalVariable("llvm.dbg.globals", StructType::get());
   if (Units == 0)
     throw "Program contains no debugging information!";
 
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index db5a306..1d8312f 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -453,7 +453,7 @@ GenericValue JIT::runFunction(Function *F,
   // arguments.  Make this function and return.
 
   // First, create the function.
-  FunctionType *STy=FunctionType::get(RetTy, std::vector<const Type*>(), false);
+  FunctionType *STy=FunctionType::get(RetTy, false);
   Function *Stub = Function::Create(STy, Function::InternalLinkage, "",
                                     F->getParent());
 
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index 551cc8c..faf01af 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -115,7 +115,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
 
   std::string ErrMsg;
   std::auto_ptr<Archive> AutoArch (
-    Archive::OpenAndLoadSymbols(Filename,&ErrMsg));
+    Archive::OpenAndLoadSymbols(Filename, Context, &ErrMsg));
 
   Archive* arch = AutoArch.get();
 
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
index 7c888aa..dc0f7c1 100644
--- a/lib/Linker/LinkItems.cpp
+++ b/lib/Linker/LinkItems.cpp
@@ -160,7 +160,7 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
   if (File.toString() == "-") {
     std::auto_ptr<Module> M;
     if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) {
-      M.reset(ParseBitcodeFile(Buffer, &Error));
+      M.reset(ParseBitcodeFile(Buffer, Context, &Error));
       delete Buffer;
       if (M.get())
         if (!LinkInModule(M.get(), &Error))
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index d673772..6e0b760 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -20,24 +20,21 @@
 using namespace llvm;
 
 Linker::Linker(const std::string& progname, const std::string& modname,
-               unsigned flags)
-  : Composite(0)
-  , LibPaths()
-  , Flags(flags)
-  , Error()
-  , ProgramName(progname)
-{
-  Composite = new Module(modname);
-}
-
-Linker::Linker(const std::string& progname, Module* aModule, unsigned flags)
-  : Composite(aModule)
-  , LibPaths()
-  , Flags(flags)
-  , Error()
-  , ProgramName(progname)
-{
-}
+               LLVMContext& C, unsigned flags): 
+  Context(C),
+  Composite(new Module(modname, C)),
+  LibPaths(),
+  Flags(flags),
+  Error(),
+  ProgramName(progname) { }
+
+Linker::Linker(const std::string& progname, Module* aModule, unsigned flags) : 
+  Context(aModule->getContext()),
+  Composite(aModule),
+  LibPaths(),
+  Flags(flags),
+  Error(),
+  ProgramName(progname) { }
 
 Linker::~Linker() {
   delete Composite;
@@ -106,7 +103,7 @@ Linker::LoadObject(const sys::Path &FN) {
   const std::string &FNS = FN.toString();
   std::auto_ptr<MemoryBuffer> Buffer(MemoryBuffer::getFileOrSTDIN(FNS.c_str()));
   if (Buffer.get())
-    Result = ParseBitcodeFile(Buffer.get(), &ParseErrorMessage);
+    Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
   else
     ParseErrorMessage = "Error reading file '" + FNS + "'";
     
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e38f2b3..7d94464 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCStreamer.h"
 
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
@@ -25,7 +26,7 @@ namespace {
 
   public:
     MCAsmStreamer(MCContext &Context, raw_ostream &_OS)
-      : MCStreamer(Context), OS(_OS) {}
+      : MCStreamer(Context), OS(_OS), CurSection(0) {}
     ~MCAsmStreamer() {}
 
     /// @name MCStreamer Interface
@@ -66,11 +67,11 @@ static inline raw_ostream &operator<<(raw_ostream &os, const MCValue &Value) {
     os << Value.getSymA()->getName();
     if (Value.getSymB())
       os << " - " << Value.getSymB()->getName();
-    if (Value.getCst())
-      os << " + " << Value.getCst();
+    if (Value.getConstant())
+      os << " + " << Value.getConstant();
   } else {
     assert(!Value.getSymB() && "Invalid machine code value!");
-    os << Value.getCst();
+    os << Value.getConstant();
   }
 
   return os;
@@ -83,7 +84,7 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
 
 static inline MCValue truncateToSize(const MCValue &Value, unsigned Bytes) {
   return MCValue::get(Value.getSymA(), Value.getSymB(), 
-                      truncateToSize(Value.getCst(), Bytes));
+                      truncateToSize(Value.getConstant(), Bytes));
 }
 
 void MCAsmStreamer::SwitchSection(MCSection *Section) {
@@ -105,6 +106,7 @@ void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
 
   OS << Symbol->getName() << ":\n";
   Symbol->setSection(CurSection);
+  Symbol->setExternal(false);
 }
 
 void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
@@ -164,20 +166,23 @@ void MCAsmStreamer::EmitValue(const MCValue &Value, unsigned Size) {
 void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
                                          unsigned ValueSize,
                                          unsigned MaxBytesToEmit) {
+  // Some assemblers don't support .balign, so we always emit as .p2align if
+  // this is a power of two. Otherwise we assume the client knows the target
+  // supports .balign and use that.
   unsigned Pow2 = Log2_32(ByteAlignment);
-  assert((1U << Pow2) == ByteAlignment && "Invalid alignment!");
+  bool IsPow2 = (1U << Pow2) == ByteAlignment;
 
   switch (ValueSize) {
   default:
     assert(0 && "Invalid size for machine code value!");
   case 8:
     assert(0 && "Unsupported alignment size!");
-  case 1: OS << ".p2align"; break;
-  case 2: OS << ".p2alignw"; break;
-  case 4: OS << ".p2alignl"; break;
+  case 1: OS << (IsPow2 ? ".p2align" : ".balign"); break;
+  case 2: OS << (IsPow2 ? ".p2alignw" : ".balignw"); break;
+  case 4: OS << (IsPow2 ? ".p2alignl" : ".balignl"); break;
   }
 
-  OS << ' ' << Pow2;
+  OS << ' ' << (IsPow2 ? Pow2 : ByteAlignment);
 
   OS << ", " << truncateToSize(Value, ValueSize);
   if (MaxBytesToEmit) 
@@ -191,10 +196,30 @@ void MCAsmStreamer::EmitValueToOffset(const MCValue &Offset,
   OS << ".org " << Offset << ", " << (unsigned) Value << '\n';
 }
 
+static raw_ostream &operator<<(raw_ostream &OS, const MCOperand &Op) {
+  if (Op.isReg())
+    return OS << "reg:" << Op.getReg();
+  if (Op.isImm())
+    return OS << "imm:" << Op.getImm();
+  if (Op.isMBBLabel())
+    return OS << "mbblabel:(" 
+              << Op.getMBBLabelFunction() << ", " << Op.getMBBLabelBlock();
+  assert(Op.isMCValue() && "Invalid operand!");
+  return OS << "val:" << Op.getMCValue();
+}
+
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
   assert(CurSection && "Cannot emit contents before setting section!");
-  // FIXME: Implement.
-  OS << "# FIXME: Implement instruction printing!\n";
+  // FIXME: Implement proper printing.
+  OS << "MCInst("
+     << "opcode=" << Inst.getOpcode() << ", "
+     << "operands=[";
+  for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
+    if (i)
+      OS << ", ";
+    OS << Inst.getOperand(i);
+  }
+  OS << "])\n";
 }
 
 void MCAsmStreamer::Finish() {
diff --git a/lib/Makefile b/lib/Makefile
index 7199da5..1e87d9e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,8 +8,10 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ..
 
-PARALLEL_DIRS = VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Debugger Linker CompilerDriver MC
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
+                Target ExecutionEngine Debugger Linker MC CompilerDriver
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 73bf774..30dc352 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2178,6 +2178,12 @@ void APInt::print(raw_ostream &OS, bool isSigned) const {
   OS << S.c_str();
 }
 
+std::ostream &llvm::operator<<(std::ostream &o, const APInt &I) {
+  raw_os_ostream OS(o);
+  OS << I;
+  return o;
+}
+
 // This implements a variety of operations on a representation of
 // arbitrary precision, two's-complement, bignum integer values.
 
diff --git a/lib/Support/Annotation.cpp b/lib/Support/Annotation.cpp
index b778043..4b5b97e 100644
--- a/lib/Support/Annotation.cpp
+++ b/lib/Support/Annotation.cpp
@@ -39,7 +39,7 @@ namespace {
 }
 
 typedef std::map<const char*, unsigned, StrCmp> IDMapType;
-static unsigned IDCounter = 0;  // Unique ID counter
+static volatile sys::cas_flag IDCounter = 0;  // Unique ID counter
 
 // Static member to ensure initialiation on demand.
 static ManagedStatic<IDMapType> IDMap;
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index d789f10..6b0d55c1 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -76,38 +76,36 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
   return LineNo;
 }
 
-void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc) const {
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
   if (IncludeLoc == SMLoc()) return;  // Top of stack.
   
   int CurBuf = FindBufferContainingLoc(IncludeLoc);
   assert(CurBuf != -1 && "Invalid or unspecified location!");
 
-  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc);
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
   
-  errs() << "Included from "
-         << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
-         << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
+  OS << "Included from "
+     << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+     << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
 }
 
 
-void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg) const {
-  raw_ostream &OS = errs();
+/// GetMessage - Return an SMDiagnostic at the specified location with the
+/// specified string.
+///
+/// @param Type - If non-null, the kind of message (e.g., "error") which is
+/// prefixed to the message.
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
+                                   const char *Type) const {
   
   // First thing to do: find the current buffer containing the specified
   // location.
   int CurBuf = FindBufferContainingLoc(Loc);
   assert(CurBuf != -1 && "Invalid or unspecified location!");
   
-  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc);
-  
   MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
   
   
-  OS << "Parsing " << CurMB->getBufferIdentifier() << ":"
-     << FindLineNumber(Loc, CurBuf) << ": ";
-  
-  OS << Msg << "\n";
-  
   // Scan backward to find the start of the line.
   const char *LineStart = Loc.getPointer();
   while (LineStart != CurMB->getBufferStart() && 
@@ -118,10 +116,60 @@ void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg) const {
   while (LineEnd != CurMB->getBufferEnd() && 
          LineEnd[0] != '\n' && LineEnd[0] != '\r')
     ++LineEnd;
+  
+  std::string PrintedMsg;
+  if (Type) {
+    PrintedMsg = Type;
+    PrintedMsg += ": ";
+  }
+  PrintedMsg += Msg;
+  
   // Print out the line.
-  OS << std::string(LineStart, LineEnd) << "\n";
-  // Print out spaces before the caret.
-  for (const char *Pos = LineStart; Pos != Loc.getPointer(); ++Pos)
-    OS << (*Pos == '\t' ? '\t' : ' ');
-  OS << "^\n";
+  return SMDiagnostic(CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
+                      Loc.getPointer()-LineStart, PrintedMsg,
+                      std::string(LineStart, LineEnd));
+}
+
+void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, 
+                             const char *Type) const {
+  raw_ostream &OS = errs();
+
+  int CurBuf = FindBufferContainingLoc(Loc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+  GetMessage(Loc, Msg, Type).Print(0, OS);
 }
+
+//===----------------------------------------------------------------------===//
+// SMDiagnostic Implementation
+//===----------------------------------------------------------------------===//
+
+void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) {
+  if (ProgName && ProgName[0])
+    S << ProgName << ": ";
+
+  if (Filename == "-")
+    S << "<stdin>";
+  else
+    S << Filename;
+  
+  if (LineNo != -1) {
+    S << ':' << LineNo;
+    if (ColumnNo != -1)
+      S << ':' << (ColumnNo+1);
+  }
+  
+  S << ": " << Message << '\n';
+  
+  if (LineNo != -1 && ColumnNo != -1) {
+    S << LineContents << '\n';
+    
+    // Print out spaces/tabs before the caret.
+    for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
+      S << (LineContents[i] == '\t' ? '\t' : ' ');
+    S << "^\n";
+  }
+}
+
+
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index 80d6e4c..c8c3238 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -38,15 +38,21 @@ bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check,
 /// being executed. This allows us to find another LLVM tool if it is built
 /// into the same directory, but that directory is neither the current
 /// directory, nor in the PATH.  If the executable cannot be found, return an
-/// empty string.
+/// empty string. Return the input string if given a full path to an executable.
 ///
 #undef FindExecutable   // needed on windows :(
 sys::Path llvm::FindExecutable(const std::string &ExeName,
                                const std::string &ProgramPath) {
-  // First check the directory that the calling program is in.  We can do this
-  // if ProgramPath contains at least one / character, indicating that it is a
-  // relative path to bugpoint itself.
-  sys::Path Result ( ProgramPath );
+  // First check if the given name is already a valid path to an executable.
+  sys::Path Result(ExeName);
+  Result.makeAbsolute();
+  if (Result.canExecute())
+    return Result;
+
+  // Otherwise check the directory that the calling program is in.  We can do
+  // this if ProgramPath contains at least one / character, indicating that it
+  // is a relative path to bugpoint itself.
+  Result = ProgramPath;
   Result.eraseComponent();
   if (!Result.isEmpty()) {
     Result.appendComponent(ExeName);
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index dd5c3d6..279bd43 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -48,6 +48,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
   case Linux: return "linux";
+  case OpenBSD: return "openbsd";
   }
 
   return "<invalid>";
@@ -90,6 +91,8 @@ void Triple::Parse() const {
     OS = FreeBSD;
   else if (memcmp(&OSName[0], "linux", 5) == 0)
     OS = Linux;
+  else if (memcmp(&OSName[0], "openbsd", 7) == 0)
+    OS = OpenBSD;
   else
     OS = UnknownOS;
 
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index 431629a..bf7a0c6 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMSystem
   Atomic.cpp
   Disassembler.cpp
   DynamicLibrary.cpp
+  Errno.cpp
   Host.cpp
   IncludeFile.cpp
   Memory.cpp
diff --git a/lib/System/Errno.cpp b/lib/System/Errno.cpp
new file mode 100644
index 0000000..d046aba
--- /dev/null
+++ b/lib/System/Errno.cpp
@@ -0,0 +1,71 @@
+//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the errno wrappers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Errno.h"
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+
+#if HAVE_STRING_H
+#include <string.h>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace sys {
+
+#if HAVE_ERRNO_H
+#include <errno.h>
+std::string StrError() {
+  return StrError(errno);
+}
+#endif  // HAVE_ERRNO_H
+
+std::string StrError(int errnum) {
+  const int MaxErrStrLen = 2000;
+  char buffer[MaxErrStrLen];
+  buffer[0] = '\0';
+  char* str = buffer;
+#ifdef HAVE_STRERROR_R
+  // strerror_r is thread-safe.
+  if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+    // glibc defines its own incompatible version of strerror_r
+    // which may not use the buffer supplied.
+    str = strerror_r(errnum,buffer,MaxErrStrLen-1);
+# else
+    strerror_r(errnum,buffer,MaxErrStrLen-1);
+# endif
+#elif defined(HAVE_STRERROR_S)  // Windows.
+    if (errnum)
+      strerror_s(buffer, errnum);
+#elif defined(HAVE_STRERROR)
+  // Copy the thread un-safe result of strerror into
+  // the buffer as fast as possible to minimize impact
+  // of collision of strerror in multiple threads.
+  if (errnum)
+    strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
+  buffer[MaxErrStrLen-1] = '\0';
+#else
+  // Strange that this system doesn't even have strerror
+  // but, oh well, just use a generic message
+  sprintf(buffer, "Error #%d", errnum);
+#endif
+  return str;
+}
+
+}  // namespace sys
+}  // namespace llvm
+
+#endif  // HAVE_STRING_H
diff --git a/lib/System/ThreadLocal.cpp b/lib/System/ThreadLocal.cpp
index 8884e79..e7054b5 100644
--- a/lib/System/ThreadLocal.cpp
+++ b/lib/System/ThreadLocal.cpp
@@ -44,7 +44,7 @@ ThreadLocalImpl::ThreadLocalImpl() : data(0) {
   int errorcode = pthread_key_create(key, NULL);
   assert(errorcode == 0);
   (void) errorcode;
-  data = key;
+  data = (void*)key;
 }
 
 ThreadLocalImpl::~ThreadLocalImpl() {
diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h
index c2c06dd..c15866f 100644
--- a/lib/System/Unix/Unix.h
+++ b/lib/System/Unix/Unix.h
@@ -20,6 +20,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/System/Errno.h"
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
@@ -77,34 +78,9 @@ static inline bool MakeErrMsg(
   std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
   if (!ErrMsg)
     return true;
-  char buffer[MAXPATHLEN];
-  buffer[0] = 0;
-  char* str = buffer;
   if (errnum == -1)
     errnum = errno;
-#ifdef HAVE_STRERROR_R
-  // strerror_r is thread-safe.
-  if (errnum)
-# if defined(__GLIBC__) && defined(_GNU_SOURCE)
-    // glibc defines its own incompatible version of strerror_r
-    // which may not use the buffer supplied.
-    str = strerror_r(errnum,buffer,MAXPATHLEN-1);
-# else
-    strerror_r(errnum,buffer,MAXPATHLEN-1);
-# endif
-#elif HAVE_STRERROR
-  // Copy the thread un-safe result of strerror into
-  // the buffer as fast as possible to minimize impact
-  // of collision of strerror in multiple threads.
-  if (errnum)
-    strncpy(buffer,strerror(errnum),MAXPATHLEN-1);
-  buffer[MAXPATHLEN-1] = 0;
-#else
-  // Strange that this system doesn't even have strerror
-  // but, oh well, just use a generic message
-  sprintf(buffer, "Error #%d", errnum);
-#endif
-  *ErrMsg = prefix + ": " + str;
+  *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum);
   return true;
 }
 
diff --git a/lib/System/Win32/ThreadLocal.inc b/lib/System/Win32/ThreadLocal.inc
index 8ab37d9..c8f7840 100644
--- a/lib/System/Win32/ThreadLocal.inc
+++ b/lib/System/Win32/ThreadLocal.inc
@@ -43,7 +43,7 @@ const void* ThreadLocalImpl::getInstance() {
 void ThreadLocalImpl::setInstance(const void* d){
   DWORD* tls = static_cast<DWORD*>(data);
   int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
-  assert(errorcode == 0);
+  assert(errorcode != 0);
 }
 
 }
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 8bf1b7c..08dc07c 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -93,7 +93,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM);
 FunctionPass *createARMCodePrinterPass(raw_ostream &O,
                                        ARMBaseTargetMachine &TM,
-                                       CodeGenOpt::Level OptLevel,
                                        bool Verbose);
 FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 005eb7a..15c9ec1 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -305,7 +305,7 @@ namespace ARM_AM {
   ///     abcdefgh abcdefgh abcdefgh abcdefgh    control = 3
   /// Return -1 if none of the above apply.
   /// See ARM Reference Manual A6.3.2.
-  static inline int getT2SOImmValSplat (unsigned V) {
+  static inline int getT2SOImmValSplat(unsigned V) {
     unsigned u, Vs, Imm;
     // control = 0
     if ((V & 0xffffff00) == 0) 
@@ -459,13 +459,13 @@ namespace ARM_AM {
   //
   // addrmode5 := reg +/- imm8*4
   //
-  // The first operand is always a Reg.  The third field encodes the operation
-  // in bit 8, the immediate in bits 0-7.
+  // The first operand is always a Reg.  The second operand encodes the
+  // operation in bit 8 and the immediate in bits 0-7.
   //
-  // This can also be used for FP load/store multiple ops. The third field encodes
-  // writeback mode in bit 8, the number of registers (or 2 times the number of
-  // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
-  // following two sub-modes:
+  // This is also used for FP load/store multiple ops. The second operand
+  // encodes the writeback mode in bit 8 and the number of registers (or 2
+  // times the number of registers for DPR ops) in bits 0-7. In addition,
+  // bits 9-11 encode one of the following two sub-modes:
   //
   //    IA - Increment after
   //    DB - Decrement before
@@ -496,7 +496,29 @@ namespace ARM_AM {
   static inline bool getAM5WBFlag(unsigned AM5Opc) {
     return ((AM5Opc >> 8) & 1);
   }
-  
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #6
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for NEON load / store instructions.
+  //
+  // addrmode6 := reg with optional writeback
+  //
+  // This is stored in three operands [regaddr, regupdate, opc].  The first is
+  // the address register.  The second register holds the value of a post-access
+  // increment for writeback or reg0 if no writeback or if the writeback
+  // increment is the size of the memory access.  The third operand encodes
+  // whether there is writeback to the address register.
+
+  static inline unsigned getAM6Opc(bool WB = false) {
+    return (int)WB;
+  }
+
+  static inline bool getAM6WBFlag(unsigned Mode) {
+    return Mode & 1;
+  }
+
 } // end namespace ARM_AM
 } // end namespace llvm
 
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 8424c2e..f295761 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -1155,16 +1155,17 @@ void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Handle jump tables.
-  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd ||
+      TID.Opcode == ARM::t2BR_JTr || TID.Opcode == ARM::t2BR_JTadd) {
     // First emit a ldr pc, [] instruction.
     emitDataProcessingInstruction(MI, ARM::PC);
 
     // Then emit the inline jump table.
-    unsigned JTIndex = (TID.Opcode == ARM::BR_JTr)
+    unsigned JTIndex = (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::t2BR_JTr)
       ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
     emitInlineJumpTable(JTIndex);
     return;
-  } else if (TID.Opcode == ARM::BR_JTm) {
+  } else if (TID.Opcode == ARM::BR_JTm || TID.Opcode == ARM::t2BR_JTm) {
     // First emit a ldr pc, [] instruction.
     emitLoadStoreInstruction(MI, ARM::PC);
 
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index db723fe..9fedaa4 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -124,6 +124,7 @@ namespace {
     const TargetInstrInfo *TII;
     ARMFunctionInfo *AFI;
     bool isThumb;
+    bool isThumb2;
   public:
     static char ID;
     ARMConstantIslands() : MachineFunctionPass(&ID) {}
@@ -213,6 +214,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
   TII = Fn.getTarget().getInstrInfo();
   AFI = Fn.getInfo<ARMFunctionInfo>();
   isThumb = AFI->isThumbFunction();
+  isThumb2 = AFI->isThumb2Function();
 
   HasFarJump = false;
 
@@ -376,6 +378,9 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
         int UOpc = Opc;
         switch (Opc) {
         case ARM::tBR_JTr:
+        case ARM::t2BR_JTr:
+        case ARM::t2BR_JTm:
+        case ARM::t2BR_JTadd:
           // A Thumb table jump may involve padding; for the offsets to
           // be right, functions containing these must be 4-byte aligned.
           AFI->setAlign(2U);
@@ -402,6 +407,16 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
           Bits = 11;
           Scale = 2;
           break;
+        case ARM::t2Bcc:
+          isCond = true;
+          UOpc = ARM::t2B;
+          Bits = 20;
+          Scale = 2;
+          break;
+        case ARM::t2B:
+          Bits = 24;
+          Scale = 2;
+          break;
         }
 
         // Record this immediate branch.
@@ -447,21 +462,25 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
             Bits = 8;
             Scale = 4;  // +-(offset_8*4)
             break;
-          case ARMII::AddrModeT1:
+            // addrmode6 has no immediate offset.
+          case ARMII::AddrModeT1_1:
             Bits = 5;  // +offset_5
             break;
-          case ARMII::AddrModeT2:
+          case ARMII::AddrModeT1_2:
             Bits = 5;
             Scale = 2;  // +(offset_5*2)
             break;
-          case ARMII::AddrModeT4:
+          case ARMII::AddrModeT1_4:
             Bits = 5;
             Scale = 4;  // +(offset_5*4)
             break;
-          case ARMII::AddrModeTs:
+          case ARMII::AddrModeT1_s:
             Bits = 8;
             Scale = 4;  // +(offset_8*4)
             break;
+          case ARMII::AddrModeT2_pc:
+            Bits = 12;  // +-offset_12
+            break;
           }
 
           // Remember that this is a user of a CP entry.
@@ -572,7 +591,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   // correspond to anything in the source.
   BuildMI(OrigBB, DebugLoc::getUnknownLoc(),
-          TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB);
+          TII->get(isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B)).addMBB(NewBB);
   NumSplit++;
 
   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
@@ -716,7 +735,8 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
   MachineBasicBlock *Succ = *MBB->succ_begin();
   MachineBasicBlock *Pred = *MBB->pred_begin();
   MachineInstr *PredMI = &Pred->back();
-  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB)
+  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+      || PredMI->getOpcode() == ARM::t2B)
     return PredMI->getOperand(0).getMBB() == Succ;
   return false;
 }
@@ -748,7 +768,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
         // Thumb jump tables require padding.  They should be at the end;
         // following unconditional branches are removed by AnalyzeBranch.
         MachineInstr *ThumbJTMI = NULL;
-        if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+        if ((prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTr)
+            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTm)
+            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTadd))
           ThumbJTMI = prior(MBB->end());
         if (ThumbJTMI) {
           unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
@@ -839,7 +862,16 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
 /// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
 /// the specific unconditional branch instruction.
 static inline unsigned getUnconditionalBrDisp(int Opc) {
-  return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4;
+  switch (Opc) {
+  case ARM::tB:
+    return ((1<<10)-1)*2;
+  case ARM::t2B:
+    return ((1<<23)-1)*2;
+  default:
+    break;
+  }
+  
+  return ((1<<23)-1)*4;
 }
 
 /// AcceptWater - Small amount of common code factored out of the following.
@@ -935,7 +967,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     // range, but if the preceding conditional branch is out of range, the
     // targets will be exchanged, and the altered branch may be out of
     // range, so the machinery has to know about it.
-    int UncondBr = isThumb ? ARM::tB : ARM::B;
+    int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
     BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
             TII->get(UncondBr)).addMBB(*NewMBB);
     unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
@@ -1165,7 +1197,7 @@ bool
 ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *MBB = MI->getParent();
-  assert(isThumb && "Expected a Thumb function!");
+  assert(isThumb && !isThumb2 && "Expected a Thumb-1 function!");
 
   // Use BL to implement far jump.
   Br.MaxDisp = (1 << 21) * 2;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 200371b..6485fc1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -64,6 +64,8 @@ public:
 
   SDNode *Select(SDValue Op);
   virtual void InstructionSelect();
+  bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
+                               SDValue &B, SDValue &C);
   bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode2Offset(SDValue Op, SDValue N,
@@ -74,9 +76,11 @@ public:
                              SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
+  bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
+                       SDValue &Opc);
 
   bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
-                         SDValue &Label);
+                        SDValue &Label);
 
   bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &Offset);
@@ -92,20 +96,34 @@ public:
   bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
-  bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
-                               SDValue &B, SDValue &C);
   bool SelectT2ShifterOperandReg(SDValue Op, SDValue N,
                                  SDValue &BaseReg, SDValue &Opc);
-  
+  bool SelectT2AddrModeImm12(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffImm);
+  bool SelectT2AddrModeImm8(SDValue Op, SDValue N, SDValue &Base,
+                            SDValue &OffImm);
+  bool SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
+                                 SDValue &OffImm);
+  bool SelectT2AddrModeImm8s4(SDValue Op, SDValue N, SDValue &Base,
+                              SDValue &OffImm);
+  bool SelectT2AddrModeSoReg(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffReg, SDValue &ShImm);
+
   // Include the pieces autogenerated from the target description.
 #include "ARMGenDAGISel.inc"
 
 private:
-    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
-    /// inline asm expressions.
-    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                              char ConstraintCode,
-                                              std::vector<SDValue> &OutOps);
+  /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+  /// ARM.
+  SDNode *SelectARMIndexedLoad(SDValue Op);
+  SDNode *SelectT2IndexedLoad(SDValue Op);
+
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
 };
 }
 
@@ -116,6 +134,30 @@ void ARMDAGToDAGISel::InstructionSelect() {
   CurDAG->RemoveDeadNodes();
 }
 
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
+                                              SDValue N,
+                                              SDValue &BaseReg,
+                                              SDValue &ShReg,
+                                              SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+  
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
                                       SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
@@ -382,6 +424,16 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
+                                      SDValue &Addr, SDValue &Update,
+                                      SDValue &Opc) {
+  Addr = N;
+  // The optional writeback is handled in ARMLoadStoreOpt.
+  Update = CurDAG->getRegister(0, MVT::i32);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
                                         SDValue &Offset, SDValue &Label) {
   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
@@ -519,30 +571,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
-                                              SDValue N,
-                                              SDValue &BaseReg,
-                                              SDValue &ShReg,
-                                              SDValue &Opc) {
-  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
-
-  // Don't match base register only case. That is matched to a separate
-  // lower complexity pattern with explicit register operand.
-  if (ShOpcVal == ARM_AM::no_shift) return false;
-  
-  BaseReg = N.getOperand(0);
-  unsigned ShImmVal = 0;
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    ShReg = CurDAG->getRegister(0, MVT::i32);
-    ShImmVal = RHS->getZExtValue() & 31;
-  } else {
-    ShReg = N.getOperand(1);
-  }
-  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
-                                  MVT::i32);
-  return true;
-}
-
 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
                                                 SDValue &BaseReg,
                                                 SDValue &Opc) {
@@ -563,11 +591,250 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
   return false;
 }
 
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
+                                            SDValue &Base, SDValue &OffImm) {
+  // Match simple R + imm12 operands.
+  if (N.getOpcode() != ISD::ADD)
+    return false;
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits.
+      Base   = N.getOperand(0);
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N,
+                                           SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+        return true;
+      }
+    }
+  } else if (N.getOpcode() == ISD::SUB) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
+                                                 SDValue &OffImm){
+  unsigned Opcode = Op.getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N)) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
+      OffImm = (AM == ISD::PRE_INC)
+        ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+        : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N,
+                                             SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (((RHSC & 0x3) == 0) && (RHSC < 0 && RHSC > -0x400)) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+        return true;
+      }
+    }
+  } else if (N.getOpcode() == ISD::SUB) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N,
+                                            SDValue &Base,
+                                            SDValue &OffReg, SDValue &ShImm) {
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::TargetConstantPool)
+        return false;  // We want to select t2LDRpci instead.
+    }
+    OffReg = CurDAG->getRegister(0, MVT::i32);
+    ShImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // Look for (R + R) or (R + (R << [1,2,3])).
+  unsigned ShAmt = 0;
+  Base   = N.getOperand(0);
+  OffReg = N.getOperand(1);
+
+  // Swap if it is ((R << c) + R).
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+  if (ShOpcVal != ARM_AM::lsl) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+    if (ShOpcVal == ARM_AM::lsl)
+      std::swap(Base, OffReg);
+  }  
+  
+  if (ShOpcVal == ARM_AM::lsl) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (ShAmt >= 4) {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      } else
+        OffReg = OffReg.getOperand(0);
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  } else if (SelectT2AddrModeImm12(Op, N, Base, ShImm) ||
+             SelectT2AddrModeImm8 (Op, N, Base, ShImm))
+    // Don't match if it's possible to match to one of the r +/- imm cases.
+    return false;
+  
+  ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+  return true;
+}
+
+//===--------------------------------------------------------------------===//
+
 /// getAL - Returns a ARMCC::AL immediate node.
 static inline SDValue getAL(SelectionDAG *CurDAG) {
   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
 }
 
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) {
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  MVT LoadedVT = LD->getMemoryVT();
+  SDValue Offset, AMOpc;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (LoadedVT == MVT::i32 &&
+      SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+    Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+    Match = true;
+  } else if (LoadedVT == MVT::i16 &&
+             SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+    Match = true;
+    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+    if (LD->getExtensionType() == ISD::SEXTLOAD) {
+      if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+      }
+    } else {
+      if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+      }
+    }
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+                                 MVT::Other, Ops, 6);
+  }
+
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  MVT LoadedVT = LD->getMemoryVT();
+  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+  SDValue Offset;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) {
+    switch (LoadedVT.getSimpleVT()) {
+    case MVT::i32:
+      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+      break;
+    case MVT::i16:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+      break;
+    case MVT::i8:
+    case MVT::i1:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+      break;
+    default:
+      return NULL;
+    }
+    Match = true;
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+                                 MVT::Other, Ops, 5);
+  }
+
+  return NULL;
+}
+
 
 SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   SDNode *N = Op.getNode();
@@ -698,47 +965,13 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
   }
   case ISD::LOAD: {
-    LoadSDNode *LD = cast<LoadSDNode>(Op);
-    ISD::MemIndexedMode AM = LD->getAddressingMode();
-    MVT LoadedVT = LD->getMemoryVT();
-    if (AM != ISD::UNINDEXED) {
-      SDValue Offset, AMOpc;
-      bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
-      unsigned Opcode = 0;
-      bool Match = false;
-      if (LoadedVT == MVT::i32 &&
-          SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-        Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
-        Match = true;
-      } else if (LoadedVT == MVT::i16 &&
-                 SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-        Match = true;
-        Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
-          ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
-          : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
-      } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
-        if (LD->getExtensionType() == ISD::SEXTLOAD) {
-          if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-            Match = true;
-            Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
-          }
-        } else {
-          if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-            Match = true;
-            Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
-          }
-        }
-      }
-
-      if (Match) {
-        SDValue Chain = LD->getChain();
-        SDValue Base = LD->getBasePtr();
-        SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
-                           CurDAG->getRegister(0, MVT::i32), Chain };
-        return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32,
-                                     MVT::Other, Ops, 6);
-      }
-    }
+    SDNode *ResNode = 0;
+    if (Subtarget->isThumb2())
+      ResNode = SelectT2IndexedLoad(Op);
+    else
+      ResNode = SelectARMIndexedLoad(Op);
+    if (ResNode)
+      return ResNode;
     // Other cases are autogenerated.
     break;
   }
@@ -751,7 +984,12 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
     // Pattern complexity = 6  cost = 1  size = 0
 
-    unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc;
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    unsigned Opc = Subtarget->isThumb() ? 
+      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
     SDValue Chain = Op.getOperand(0);
     SDValue N1 = Op.getOperand(1);
     SDValue N2 = Op.getOperand(2);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c24bb2e..41c9ecc 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -231,16 +231,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 
   // ARM supports all 4 flavors of integer indexed load / store.
-  for (unsigned im = (unsigned)ISD::PRE_INC;
-       im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
-    setIndexedLoadAction(im,  MVT::i1,  Legal);
-    setIndexedLoadAction(im,  MVT::i8,  Legal);
-    setIndexedLoadAction(im,  MVT::i16, Legal);
-    setIndexedLoadAction(im,  MVT::i32, Legal);
-    setIndexedStoreAction(im, MVT::i1,  Legal);
-    setIndexedStoreAction(im, MVT::i8,  Legal);
-    setIndexedStoreAction(im, MVT::i16, Legal);
-    setIndexedStoreAction(im, MVT::i32, Legal);
+  if (!Subtarget->isThumb1Only()) {
+    for (unsigned im = (unsigned)ISD::PRE_INC;
+         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+      setIndexedLoadAction(im,  MVT::i1,  Legal);
+      setIndexedLoadAction(im,  MVT::i8,  Legal);
+      setIndexedLoadAction(im,  MVT::i16, Legal);
+      setIndexedLoadAction(im,  MVT::i32, Legal);
+      setIndexedStoreAction(im, MVT::i1,  Legal);
+      setIndexedStoreAction(im, MVT::i8,  Legal);
+      setIndexedStoreAction(im, MVT::i16, Legal);
+      setIndexedStoreAction(im, MVT::i32, Legal);
+    }
   }
 
   // i64 operation support.
@@ -301,7 +303,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
 
-  if (!Subtarget->hasV6Ops()) {
+  if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
   }
@@ -402,7 +404,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
   case ARMISD::CMP:           return "ARMISD::CMP";
-  case ARMISD::CMPNZ:         return "ARMISD::CMPNZ";
+  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
@@ -455,6 +457,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
+  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
+}
+
 //===----------------------------------------------------------------------===//
 // Lowering Code
 //===----------------------------------------------------------------------===//
@@ -1152,7 +1159,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   // FIXME: is there useful debug info available here?
   std::pair<SDValue, SDValue> CallResult =
     LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false,
-                CallingConv::C, false,
+                0, CallingConv::C, false,
                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   return CallResult.first;
 }
@@ -1592,10 +1599,8 @@ static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     break;
   case ARMCC::EQ:
   case ARMCC::NE:
-  case ARMCC::MI:
-  case ARMCC::PL:
-    // Uses only N and Z Flags
-    CompareType = ARMISD::CMPNZ;
+    // Uses only Z Flag
+    CompareType = ARMISD::CMPZ;
     break;
   }
   ARMCC = DAG.getConstant(CondCode, MVT::i32);
@@ -2920,10 +2925,10 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
-static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
-                                   bool isSEXTLoad, SDValue &Base,
-                                   SDValue &Offset, bool &isInc,
-                                   SelectionDAG &DAG) {
+static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT,
+                                      bool isSEXTLoad, SDValue &Base,
+                                      SDValue &Offset, bool &isInc,
+                                      SelectionDAG &DAG) {
   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
     return false;
 
@@ -2933,6 +2938,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
       int RHSC = (int)RHS->getZExtValue();
       if (RHSC < 0 && RHSC > -256) {
+        assert(Ptr->getOpcode() == ISD::ADD);
         isInc = false;
         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
         return true;
@@ -2946,6 +2952,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
       int RHSC = (int)RHS->getZExtValue();
       if (RHSC < 0 && RHSC > -0x1000) {
+        assert(Ptr->getOpcode() == ISD::ADD);
         isInc = false;
         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
         Base = Ptr->getOperand(0);
@@ -2976,6 +2983,31 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
   return false;
 }
 
+static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT,
+                                     bool isSEXTLoad, SDValue &Base,
+                                     SDValue &Offset, bool &isInc,
+                                     SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+    return false;
+
+  Base = Ptr->getOperand(0);
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+      assert(Ptr->getOpcode() == ISD::ADD);
+      isInc = false;
+      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+      return true;
+    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+      isInc = Ptr->getOpcode() == ISD::ADD;
+      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+      return true;
+    }
+  }
+
+  return false;
+}
+
 /// getPreIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if the node's address
 /// can be legally represented as pre-indexed load / store address.
@@ -2984,7 +3016,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
                                              SDValue &Offset,
                                              ISD::MemIndexedMode &AM,
                                              SelectionDAG &DAG) const {
-  if (Subtarget->isThumb())
+  if (Subtarget->isThumb1Only())
     return false;
 
   MVT VT;
@@ -3001,13 +3033,18 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
     return false;
 
   bool isInc;
-  bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset,
-                                        isInc, DAG);
-  if (isLegal) {
-    AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
-    return true;
-  }
-  return false;
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                       Offset, isInc, DAG);
+  else 
+    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                        Offset, isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+  return true;
 }
 
 /// getPostIndexedAddressParts - returns true by value, base pointer and
@@ -3018,7 +3055,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
                                                    SDValue &Offset,
                                                    ISD::MemIndexedMode &AM,
                                                    SelectionDAG &DAG) const {
-  if (Subtarget->isThumb())
+  if (Subtarget->isThumb1Only())
     return false;
 
   MVT VT;
@@ -3033,13 +3070,18 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
     return false;
 
   bool isInc;
-  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
                                         isInc, DAG);
-  if (isLegal) {
-    AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
-    return true;
-  }
-  return false;
+  else 
+    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+  return true;
 }
 
 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 631e37f..553a86d 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -45,7 +45,7 @@ namespace llvm {
       PIC_ADD,      // Add with a PC operand and a PIC label.
 
       CMP,          // ARM compare instructions.
-      CMPNZ,        // ARM compare that uses only N or Z flags.
+      CMPZ,         // ARM compare that sets only Z flag.
       CMPFP,        // ARM VFP compare instruction, sets FPSCR.
       CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
       FMSTAT,       // ARM fmstat instruction.
@@ -197,6 +197,9 @@ namespace llvm {
       return Subtarget;
     }
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when generating code for different targets.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index d7371b0..301a6c1 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -59,6 +59,49 @@ def NEONDupFrm    : Format<27>;
 class UnaryDP  { bit isUnaryDataProc = 1; }
 
 //===----------------------------------------------------------------------===//
+// ARM Instruction flags.  These need to match ARMInstrInfo.h.
+//
+
+// Addressing mode.
+class AddrMode<bits<4> val> {
+  bits<4> Value = val;
+}
+def AddrModeNone  : AddrMode<0>;
+def AddrMode1     : AddrMode<1>;
+def AddrMode2     : AddrMode<2>;
+def AddrMode3     : AddrMode<3>;
+def AddrMode4     : AddrMode<4>;
+def AddrMode5     : AddrMode<5>;
+def AddrMode6     : AddrMode<6>;
+def AddrModeT1_1  : AddrMode<7>;
+def AddrModeT1_2  : AddrMode<8>;
+def AddrModeT1_4  : AddrMode<9>;
+def AddrModeT1_s  : AddrMode<10>;
+def AddrModeT2_i12: AddrMode<12>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+  bits<3> Value = val;
+}
+def SizeInvalid  : SizeFlagVal<0>;  // Unset.
+def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
+def Size8Bytes   : SizeFlagVal<2>;
+def Size4Bytes   : SizeFlagVal<3>;
+def Size2Bytes   : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+  bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre  : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+
+//===----------------------------------------------------------------------===//
 
 // ARM Instruction templates.
 //
@@ -706,7 +749,6 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
 // Thumb Instruction Format Definitions.
 //
 
-
 // TI - Thumb instruction.
 
 class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
@@ -721,18 +763,6 @@ class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
 
 class TI<dag outs, dag ins, string asm, list<dag> pattern>
   : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
-class TI1<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeT1, Size2Bytes, asm, "", pattern>;
-class TI2<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeT2, Size2Bytes, asm, "", pattern>;
-class TI4<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeT4, Size2Bytes, asm, "", pattern>;
-class TIs<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeTs, Size2Bytes, asm, "", pattern>;
-
-// Two-address instructions
-class TIt<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
 
 // BL, BLX(1) are translated by assembler into two instructions
 class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
@@ -764,6 +794,18 @@ class Thumb1I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
 
 class T1I<dag outs, dag ins, string asm, list<dag> pattern>
   : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class T1I1<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_1, Size2Bytes, asm, "", pattern>;
+class T1I2<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_2, Size2Bytes, asm, "", pattern>;
+class T1I4<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_4, Size2Bytes, asm, "", pattern>;
+class T1Is<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_s, Size2Bytes, asm, "", pattern>;
+class T1Ix2<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+class T1JTI<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
 
 // Two-address instructions
 class T1It<dag outs, dag ins, string asm, list<dag> pattern>
@@ -781,7 +823,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 // Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
@@ -796,7 +838,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 // Special cases
@@ -807,21 +849,45 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = iops;
   let AsmString   = asm;
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 class T2I<dag oops, dag iops, string opc, string asm, list<dag> pattern>
   : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, opc, asm, "", pattern>;
+class T2Ii8<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, opc, asm, "", pattern>;
+class T2Ii8s4<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, opc, asm, "", pattern>;
 
 class T2sI<dag oops, dag iops, string opc, string asm, list<dag> pattern>
   : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
 
 class T2XI<dag oops, dag iops, string asm, list<dag> pattern>
   : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, asm, "", pattern>;
+
+// T2Iidxldst - Thumb2 indexed load / store instructions.
+class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im,
+                 string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
 
 // T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
 class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index d95089d..443fdc7 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -40,108 +40,24 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
 }
 
 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI)
-  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
-    RI(*this, STI) {
+  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) {
 }
 
 ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI) {
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
-                               unsigned &SrcReg, unsigned &DstReg,
-                               unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
-  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
-  unsigned oc = MI.getOpcode();
-  switch (oc) {
-  default:
-    return false;
-  case ARM::FCPYS:
-  case ARM::FCPYD:
-  case ARM::VMOVD:
-  case ARM::VMOVQ:
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  case ARM::MOVr:
-    assert(MI.getDesc().getNumOperands() >= 2 &&
-           MI.getOperand(0).isReg() &&
-           MI.getOperand(1).isReg() &&
-           "Invalid ARM MOV instruction");
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  }
-}
-
-unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                           int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::LDR:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::FLDD:
-  case ARM::FLDS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                          int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::STR:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::FSTD:
-  case ARM::FSTS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-
-  return 0;
-}
-
-void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I,
-                                     unsigned DestReg,
-                                     const MachineInstr *Orig) const {
+void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I,
+                                 unsigned DestReg,
+                                 const MachineInstr *Orig) const {
   DebugLoc dl = Orig->getDebugLoc();
   if (Orig->getOpcode() == ARM::MOVi2pieces) {
-    RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(),
-                         Orig->getOperand(2).getImm(),
-                         Orig->getOperand(3).getReg(), this, false, dl);
+    RI.emitLoadConstPool(MBB, I, this, dl,
+                         DestReg,
+                         Orig->getOperand(1).getImm(),
+                         (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
+                         Orig->getOperand(3).getReg());
     return;
   }
 
@@ -334,10 +250,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
 
 // Branch analysis.
 bool
-  ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
-                                  MachineBasicBlock *&FBB,
-                                  SmallVectorImpl<MachineOperand> &Cond,
-                                  bool AllowModify) const {
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
@@ -349,11 +265,11 @@ bool
   // If there is only one terminator instruction, process it.
   unsigned LastOpc = LastInst->getOpcode();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+    if (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B) {
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
-    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc || LastOpc == ARM::t2Bcc) {
       // Block ends with fall-through condbranch.
       TBB = LastInst->getOperand(0).getMBB();
       Cond.push_back(LastInst->getOperand(1));
@@ -370,10 +286,12 @@ bool
   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
 
-  // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+  // If the block ends with ARM::B/ARM::tB/ARM::t2B and a 
+  // ARM::Bcc/ARM::tBcc/ARM::t2Bcc, handle it.
   unsigned SecondLastOpc = SecondLastInst->getOpcode();
   if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
-      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB) ||
+      (SecondLastOpc == ARM::t2Bcc && LastOpc == ARM::t2B)) {
     TBB =  SecondLastInst->getOperand(0).getMBB();
     Cond.push_back(SecondLastInst->getOperand(1));
     Cond.push_back(SecondLastInst->getOperand(2));
@@ -383,8 +301,9 @@ bool
 
   // If the block ends with two unconditional branches, handle it.  The second
   // one is not executed, so remove it.
-  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
-      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB || 
+       SecondLastOpc==ARM::t2B) &&
+      (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) {
     TBB = SecondLastInst->getOperand(0).getMBB();
     I = LastInst;
     if (AllowModify)
@@ -396,8 +315,10 @@ bool
   // branch. The branch folder can create these, and we must get rid of them for
   // correctness of Thumb constant islands.
   if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm ||
-       SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) &&
-      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+       SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr ||
+       SecondLastOpc == ARM::t2BR_JTr || SecondLastOpc==ARM::t2BR_JTm ||
+       SecondLastOpc == ARM::t2BR_JTadd) &&
+      (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) {
     I = LastInst;
     if (AllowModify)
       I->eraseFromParent();
@@ -412,8 +333,10 @@ bool
 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
-  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+  int BOpc   = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
 
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin()) return 0;
@@ -444,8 +367,10 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   DebugLoc dl = DebugLoc::getUnknownLoc();
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
-  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+  int BOpc   = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
 
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -468,11 +393,288 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return 2;
 }
 
-bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator I,
-                                unsigned DestReg, unsigned SrcReg,
-                                const TargetRegisterClass *DestRC,
-                                const TargetRegisterClass *SrcRC) const {
+bool
+ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+  if (MBB.empty()) return false;
+
+  switch (MBB.back().getOpcode()) {
+  case ARM::BX_RET:   // Return.
+  case ARM::LDM_RET:
+  case ARM::tBX_RET:
+  case ARM::tBX_RET_vararg:
+  case ARM::tPOP_RET:
+  case ARM::B:
+  case ARM::tB:
+  case ARM::t2B:      // Uncond branch.
+  case ARM::tBR_JTr:
+  case ARM::t2BR_JTr:
+  case ARM::BR_JTr:   // Jumptable branch.
+  case ARM::t2BR_JTm:
+  case ARM::BR_JTm:   // Jumptable branch through mem.
+  case ARM::t2BR_JTadd:
+  case ARM::BR_JTadd: // Jumptable branch add to pc.
+    return true;
+  default: return false;
+  }
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+  return false;
+}
+
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+  int PIdx = MI->findFirstPredOperandIdx();
+  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B) {
+    MI->setDesc(get((Opc == ARM::B) ? ARM::Bcc :
+                    ((Opc == ARM::tB) ? ARM::tBcc : ARM::t2Bcc)));
+    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+    return true;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setImm(Pred[0].getImm());
+    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+    return true;
+  }
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  if (Pred1.size() > 2 || Pred2.size() > 2)
+    return false;
+
+  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+  if (CC1 == CC2)
+    return true;
+
+  switch (CC1) {
+  default:
+    return false;
+  case ARMCC::AL:
+    return true;
+  case ARMCC::HS:
+    return CC2 == ARMCC::HI;
+  case ARMCC::LS:
+    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+  case ARMCC::GE:
+    return CC2 == ARMCC::GT;
+  case ARMCC::LE:
+    return CC2 == ARMCC::LT;
+  }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+    return false;
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+      Pred.push_back(MO);
+      Found = true;
+    }
+  }
+
+  return Found;
+}
+
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) {
+  return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
+
+  // Basic size info comes from the TSFlags field.
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned TSFlags = TID.TSFlags;
+
+  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+  default: {
+    // If this machine instr is an inline asm, measure it.
+    if (MI->getOpcode() == ARM::INLINEASM)
+      return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+    if (MI->isLabel())
+      return 0;
+    switch (MI->getOpcode()) {
+    default:
+      assert(0 && "Unknown or unset size field for instr!");
+      break;
+    case TargetInstrInfo::IMPLICIT_DEF:
+    case TargetInstrInfo::DECLARE:
+    case TargetInstrInfo::DBG_LABEL:
+    case TargetInstrInfo::EH_LABEL:
+      return 0;
+    }
+    break;
+  }
+  case ARMII::Size8Bytes: return 8;          // Arm instruction x 2.
+  case ARMII::Size4Bytes: return 4;          // Arm instruction.
+  case ARMII::Size2Bytes: return 2;          // Thumb instruction.
+  case ARMII::SizeSpecial: {
+    switch (MI->getOpcode()) {
+    case ARM::CONSTPOOL_ENTRY:
+      // If this machine instr is a constant pool entry, its size is recorded as
+      // operand #2.
+      return MI->getOperand(2).getImm();
+    case ARM::Int_eh_sjlj_setjmp: return 12;
+    case ARM::BR_JTr:
+    case ARM::BR_JTm:
+    case ARM::BR_JTadd:
+    case ARM::t2BR_JTr:
+    case ARM::t2BR_JTm:
+    case ARM::t2BR_JTadd:
+    case ARM::tBR_JTr: {
+      // These are jumptable branches, i.e. a branch followed by an inlined
+      // jumptable. The size is 4 + 4 * number of entries.
+      unsigned NumOps = TID.getNumOperands();
+      MachineOperand JTOP =
+        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+      unsigned JTI = JTOP.getIndex();
+      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+      assert(JTI < JT.size());
+      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+      // 4 aligned. The assembler / linker may add 2 byte padding just before
+      // the JT entries.  The size does not include this padding; the
+      // constant islands pass does separate bookkeeping for it.
+      // FIXME: If we know the size of the function is less than (1 << 16) *2
+      // bytes, we can use 16-bit entries instead. Then there won't be an
+      // alignment issue.
+      return getNumJTEntries(JT, JTI) * 4 +
+        ((MI->getOpcode()==ARM::tBR_JTr) ? 2 : 4);
+    }
+    default:
+      // Otherwise, pseudo-instruction sizes are zero.
+      return 0;
+    }
+  }
+  }
+  return 0; // Not reached
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool
+ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
+                              unsigned &SrcReg, unsigned &DstReg,
+                              unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  unsigned oc = MI.getOpcode();
+  switch (oc) {
+  default:
+    return false;
+  case ARM::FCPYS:
+  case ARM::FCPYD:
+  case ARM::VMOVD:
+  case ARM::VMOVQ:
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  case ARM::MOVr:
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid ARM MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned 
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDR:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FLDD:
+  case ARM::FLDS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                     int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STR:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FSTD:
+  case ARM::FSTS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+bool
+ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I,
+                               unsigned DestReg, unsigned SrcReg,
+                               const TargetRegisterClass *DestRC,
+                               const TargetRegisterClass *SrcRC) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -498,7 +700,7 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   return true;
 }
 
-void ARMInstrInfo::
+void ARMBaseInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
@@ -521,11 +723,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                  bool isKill,
-                                  SmallVectorImpl<MachineOperand> &Addr,
-                                  const TargetRegisterClass *RC,
-                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
+void 
+ARMBaseInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                 bool isKill,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const{
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
@@ -546,7 +749,7 @@ void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
   return;
 }
 
-void ARMInstrInfo::
+void ARMBaseInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
@@ -566,7 +769,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ARMInstrInfo::
+void ARMBaseInstrInfo::
 loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                 SmallVectorImpl<MachineOperand> &Addr,
                 const TargetRegisterClass *RC,
@@ -590,7 +793,7 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   return;
 }
 
-MachineInstr *ARMInstrInfo::
+MachineInstr *ARMBaseInstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
   if (Ops.size() != 1) return NULL;
@@ -609,14 +812,19 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
-        .addReg(SrcReg, getKillRegState(isKill))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
         .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
         .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
     }
     break;
@@ -626,14 +834,22 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
-        .addReg(SrcReg).addFrameIndex(FI)
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addFrameIndex(FI)
         .addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg)
-        .addFrameIndex(FI)
-        .addImm(0).addImm(Pred).addReg(PredReg);
+      bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
+        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
     break;
   }
@@ -643,14 +859,19 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
-        .addReg(SrcReg, getKillRegState(isKill))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
     break;
@@ -660,35 +881,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   return NewMI;
 }
 
-bool ARMBaseInstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
-                     const SmallVectorImpl<unsigned> &Ops) const {
+MachineInstr* 
+ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                        MachineInstr* MI,
+                                        const SmallVectorImpl<unsigned> &Ops,
+                                        MachineInstr* LoadMI) const {
+  return 0;
+}
+
+bool
+ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                       const SmallVectorImpl<unsigned> &Ops) const {
   if (Ops.size() != 1) return false;
 
-  unsigned OpNum = Ops[0];
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default: break;
   case ARM::MOVr:
     // If it is updating CPSR, then it cannot be folded.
     return MI->getOperand(4).getReg() != ARM::CPSR;
-  case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
-        // tSpill cannot take a high register operand.
-        return false;
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
-        // tRestore cannot target a high register operand.
-        return false;
-    }
-    return true;
-  }
   case ARM::FCPYS:
   case ARM::FCPYD:
     return true;
@@ -700,183 +911,3 @@ canFoldMemoryOperand(const MachineInstr *MI,
 
   return false;
 }
-
-bool
-  ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-
-  switch (MBB.back().getOpcode()) {
-  case ARM::BX_RET:   // Return.
-  case ARM::LDM_RET:
-  case ARM::tBX_RET:
-  case ARM::tBX_RET_vararg:
-  case ARM::tPOP_RET:
-  case ARM::B:
-  case ARM::tB:       // Uncond branch.
-  case ARM::tBR_JTr:
-  case ARM::BR_JTr:   // Jumptable branch.
-  case ARM::BR_JTm:   // Jumptable branch through mem.
-  case ARM::BR_JTadd: // Jumptable branch add to pc.
-    return true;
-  default: return false;
-  }
-}
-
-bool ARMBaseInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
-  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
-  return false;
-}
-
-bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
-  int PIdx = MI->findFirstPredOperandIdx();
-  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
-}
-
-bool ARMBaseInstrInfo::
-PredicateInstruction(MachineInstr *MI,
-                     const SmallVectorImpl<MachineOperand> &Pred) const {
-  unsigned Opc = MI->getOpcode();
-  if (Opc == ARM::B || Opc == ARM::tB) {
-    MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc));
-    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
-    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
-    return true;
-  }
-
-  int PIdx = MI->findFirstPredOperandIdx();
-  if (PIdx != -1) {
-    MachineOperand &PMO = MI->getOperand(PIdx);
-    PMO.setImm(Pred[0].getImm());
-    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
-    return true;
-  }
-  return false;
-}
-
-bool ARMBaseInstrInfo::
-SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                  const SmallVectorImpl<MachineOperand> &Pred2) const {
-  if (Pred1.size() > 2 || Pred2.size() > 2)
-    return false;
-
-  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
-  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
-  if (CC1 == CC2)
-    return true;
-
-  switch (CC1) {
-  default:
-    return false;
-  case ARMCC::AL:
-    return true;
-  case ARMCC::HS:
-    return CC2 == ARMCC::HI;
-  case ARMCC::LS:
-    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
-  case ARMCC::GE:
-    return CC2 == ARMCC::GT;
-  case ARMCC::LE:
-    return CC2 == ARMCC::LT;
-  }
-}
-
-bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
-                                    std::vector<MachineOperand> &Pred) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
-    return false;
-
-  bool Found = false;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
-      Pred.push_back(MO);
-      Found = true;
-    }
-  }
-
-  return Found;
-}
-
-
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) DISABLE_INLINE;
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) {
-  return JT[JTI].MBBs.size();
-}
-
-/// GetInstSize - Return the size of the specified MachineInstr.
-///
-unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const MachineBasicBlock &MBB = *MI->getParent();
-  const MachineFunction *MF = MBB.getParent();
-  const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
-
-  // Basic size info comes from the TSFlags field.
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned TSFlags = TID.TSFlags;
-
-  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
-  default: {
-    // If this machine instr is an inline asm, measure it.
-    if (MI->getOpcode() == ARM::INLINEASM)
-      return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
-    if (MI->isLabel())
-      return 0;
-    switch (MI->getOpcode()) {
-    default:
-      assert(0 && "Unknown or unset size field for instr!");
-      break;
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::DECLARE:
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
-      return 0;
-    }
-    break;
-  }
-  case ARMII::Size8Bytes: return 8;          // Arm instruction x 2.
-  case ARMII::Size4Bytes: return 4;          // Arm instruction.
-  case ARMII::Size2Bytes: return 2;          // Thumb instruction.
-  case ARMII::SizeSpecial: {
-    switch (MI->getOpcode()) {
-    case ARM::CONSTPOOL_ENTRY:
-      // If this machine instr is a constant pool entry, its size is recorded as
-      // operand #2.
-      return MI->getOperand(2).getImm();
-    case ARM::Int_eh_sjlj_setjmp: return 12;
-    case ARM::BR_JTr:
-    case ARM::BR_JTm:
-    case ARM::BR_JTadd:
-    case ARM::tBR_JTr: {
-      // These are jumptable branches, i.e. a branch followed by an inlined
-      // jumptable. The size is 4 + 4 * number of entries.
-      unsigned NumOps = TID.getNumOperands();
-      MachineOperand JTOP =
-        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
-      unsigned JTI = JTOP.getIndex();
-      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-      assert(JTI < JT.size());
-      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
-      // 4 aligned. The assembler / linker may add 2 byte padding just before
-      // the JT entries.  The size does not include this padding; the
-      // constant islands pass does separate bookkeeping for it.
-      // FIXME: If we know the size of the function is less than (1 << 16) *2
-      // bytes, we can use 16-bit entries instead. Then there won't be an
-      // alignment issue.
-      return getNumJTEntries(JT, JTI) * 4 +
-             (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
-    }
-    default:
-      // Otherwise, pseudo-instruction sizes are zero.
-      return 0;
-    }
-  }
-  }
-  return 0; // Not reached
-}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 131960b..8c8f788 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -33,16 +33,22 @@ namespace ARMII {
     // This four-bit field describes the addressing mode used.
 
     AddrModeMask  = 0xf,
-    AddrModeNone  = 0,
-    AddrMode1     = 1,
-    AddrMode2     = 2,
-    AddrMode3     = 3,
-    AddrMode4     = 4,
-    AddrMode5     = 5,
-    AddrModeT1    = 6,
-    AddrModeT2    = 7,
-    AddrModeT4    = 8,
-    AddrModeTs    = 9,  // i8 * 4 for pc and sp relative data
+    AddrModeNone    = 0,
+    AddrMode1       = 1,
+    AddrMode2       = 2,
+    AddrMode3       = 3,
+    AddrMode4       = 4,
+    AddrMode5       = 5,
+    AddrMode6       = 6,
+    AddrModeT1_1    = 7,
+    AddrModeT1_2    = 8,
+    AddrModeT1_4    = 9,
+    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
+    AddrModeT2_i12  = 11,
+    AddrModeT2_i8   = 12,
+    AddrModeT2_so   = 13,
+    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
+    AddrModeT2_i8s4 = 15, // i8 * 4
 
     // Size* - Flags to keep track of the size of an instruction.
     SizeShift     = 4,
@@ -147,25 +153,16 @@ namespace ARMII {
 }
 
 class ARMBaseInstrInfo : public TargetInstrInfoImpl {
-  const ARMRegisterInfo RI;
 protected:
   // Can be only subclassed.
   explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
 public:
-
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  ///
-  virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; }
-
-  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                     unsigned DestReg, const MachineInstr *Orig) const;
-
   virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
                                               MachineBasicBlock::iterator &MBBI,
                                               LiveVariables *LV) const;
 
+  virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+
   // Branch analysis.
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
@@ -176,9 +173,6 @@ public:
                                 MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond) const;
 
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-
   virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -206,11 +200,6 @@ public:
   /// GetInstSize - Returns the size of the specified MachineInstr.
   ///
   virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
-};
-
-class ARMInstrInfo : public ARMBaseInstrInfo {
-public:
-  explicit ARMInstrInfo(const ARMSubtarget &STI);
 
   /// Return true if the instruction is a register to register move and return
   /// the source and dest operands and their sub-register indices by reference.
@@ -248,17 +237,33 @@ public:
                                const TargetRegisterClass *RC,
                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
 
+  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+  
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
+                                              const SmallVectorImpl<unsigned> &Ops,
                                               int FrameIndex) const;
 
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
-                                              MachineInstr* LoadMI) const {
-    return 0;
-  }
+                                              const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const;
+};
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+  ARMRegisterInfo RI;
+public:
+  explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+
+  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                     unsigned DestReg, const MachineInstr *Orig) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cb7b7b9..408f47a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -75,8 +75,8 @@ def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
 def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
                               [SDNPOutFlag]>;
 
-def ARMcmpNZ         : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp,
-                              [SDNPOutFlag]>;
+def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+                              [SDNPOutFlag,SDNPCommutative]>;
 
 def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
 
@@ -99,7 +99,7 @@ def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
 def HasNEON   : Predicate<"Subtarget->hasNEON()">;
 def IsThumb   : Predicate<"Subtarget->isThumb()">;
 def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
+def IsThumb2  : Predicate<"Subtarget->isThumb2()">;
 def IsARM     : Predicate<"!Subtarget->isThumb()">;
 def IsDarwin    : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
@@ -287,6 +287,14 @@ def addrmode5 : Operand<i32>,
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
+// addrmode6 := reg with optional writeback
+//
+def addrmode6 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode6", []> {
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
+}
+
 // addrmodepc := pc + reg
 //
 def addrmodepc : Operand<i32>,
@@ -309,43 +317,6 @@ def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
 }
 
 //===----------------------------------------------------------------------===//
-// ARM Instruction flags.  These need to match ARMInstrInfo.h.
-//
-
-// Addressing mode.
-class AddrMode<bits<4> val> {
-  bits<4> Value = val;
-}
-def AddrModeNone : AddrMode<0>;
-def AddrMode1    : AddrMode<1>;
-def AddrMode2    : AddrMode<2>;
-def AddrMode3    : AddrMode<3>;
-def AddrMode4    : AddrMode<4>;
-def AddrMode5    : AddrMode<5>;
-def AddrModeT1   : AddrMode<6>;
-def AddrModeT2   : AddrMode<7>;
-def AddrModeT4   : AddrMode<8>;
-def AddrModeTs   : AddrMode<9>;
-
-// Instruction size.
-class SizeFlagVal<bits<3> val> {
-  bits<3> Value = val;
-}
-def SizeInvalid  : SizeFlagVal<0>;  // Unset.
-def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
-def Size8Bytes   : SizeFlagVal<2>;
-def Size4Bytes   : SizeFlagVal<3>;
-def Size2Bytes   : SizeFlagVal<4>;
-
-// Load / store index mode.
-class IndexMode<bits<2> val> {
-  bits<2> Value = val;
-}
-def IndexModeNone : IndexMode<0>;
-def IndexModePre  : IndexMode<1>;
-def IndexModePost : IndexMode<2>;
-
-//===----------------------------------------------------------------------===//
 
 include "ARMInstrFormats.td"
 
@@ -780,7 +751,7 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
 
 def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
                       (ins GPR:$base,am3offset:$offset), LdMiscFrm,
-                      "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
+                    "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
 }
 
 // Store
@@ -1309,19 +1280,19 @@ defm CMN  : AI1_cmp_irs<0b1011, "cmn",
 
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
-                        BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>, 1>;
+                        BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
-                        BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>, 1>;
+                        BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>;
 
-defm CMPnz : AI1_cmp_irs<0b1010, "cmp",
-                         BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
-defm CMNnz : AI1_cmp_irs<0b1011, "cmn",
-                         BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+defm CMPz  : AI1_cmp_irs<0b1010, "cmp",
+                         BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
+defm CMNz  : AI1_cmp_irs<0b1011, "cmn",
+                         BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
 
 def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
              (CMNri  GPR:$src, so_imm_neg:$imm)>;
 
-def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm),
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
              (CMNri  GPR:$src, so_imm_neg:$imm)>;
 
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 7927ca5..904d9b1 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -187,23 +187,24 @@ let isCall = 1,
 let isBranch = 1, isTerminator = 1 in {
   let isBarrier = 1 in {
     let isPredicable = 1 in
-    def tB   : TI<(outs), (ins brtarget:$target), "b $target",
-                  [(br bb:$target)]>;
+    def tB   : T1I<(outs), (ins brtarget:$target), "b $target",
+                   [(br bb:$target)]>;
 
   // Far jump
-  def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>;
+  def tBfar : T1Ix2<(outs), (ins brtarget:$target), 
+                    "bl $target\t@ far jump",[]>;
 
-  def tBR_JTr : TJTI<(outs),
-                     (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
-                     "cpy pc, $target \n\t.align\t2\n$jt",
-                     [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
+  def tBR_JTr : T1JTI<(outs),
+                      (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
+                      "cpy pc, $target \n\t.align\t2\n$jt",
+                      [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
   }
 }
 
 // FIXME: should be able to write a pattern for ARMBrcond, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let isBranch = 1, isTerminator = 1 in
-  def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
+  def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
                  [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
 //===----------------------------------------------------------------------===//
@@ -211,68 +212,68 @@ let isBranch = 1, isTerminator = 1 in
 //
 
 let canFoldAsLoad = 1 in
-def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
+def tLDR : T1I4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
                "ldr $dst, $addr",
                [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
 
-def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
+def tLDRB : T1I1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
                 "ldrb $dst, $addr",
                 [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
 
-def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
+def tLDRH : T1I2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
                 "ldrh $dst, $addr",
                 [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
 
-def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+def tLDRSB : T1I1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
                  "ldrsb $dst, $addr",
                  [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
 
-def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+def tLDRSH : T1I2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
                  "ldrsh $dst, $addr",
                  [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
 
 let canFoldAsLoad = 1 in
-def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+def tLDRspi : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
                   "ldr $dst, $addr",
                   [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>;
 
 // Special instruction for restore. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
 let canFoldAsLoad = 1, mayLoad = 1 in
-def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+def tRestore : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
                     "ldr $dst, $addr", []>;
 
 // Load tconstpool
 let canFoldAsLoad = 1 in
-def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+def tLDRpci : T1Is<(outs tGPR:$dst), (ins i32imm:$addr),
                   "ldr $dst, $addr",
                   [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
-def tLDRcp  : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+def tLDRcp  : T1Is<(outs tGPR:$dst), (ins i32imm:$addr),
                   "ldr $dst, $addr", []>;
 
-def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
+def tSTR : T1I4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
                "str $src, $addr",
                [(store tGPR:$src, t_addrmode_s4:$addr)]>;
 
-def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
+def tSTRB : T1I1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
                  "strb $src, $addr",
                  [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>;
 
-def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
+def tSTRH : T1I2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
                  "strh $src, $addr",
                  [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>;
 
-def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+def tSTRspi : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
                    "str $src, $addr",
                    [(store tGPR:$src, t_addrmode_sp:$addr)]>;
 
 let mayStore = 1 in {
 // Special instruction for spill. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
-def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+def tSpill : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
                   "str $src, $addr", []>;
 }
 
@@ -362,9 +363,9 @@ let Defs = [CPSR] in {
 def tCMN : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
               "cmn $lhs, $rhs",
               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
-def tCMNNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-                "cmn $lhs, $rhs",
-                [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+def tCMNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "cmn $lhs, $rhs",
+               [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
 }
 
 // CMP immediate
@@ -372,9 +373,9 @@ let Defs = [CPSR] in {
 def tCMPi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
                "cmp $lhs, $rhs",
                [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
-def tCMPNZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
-                 "cmp $lhs, $rhs",
-                 [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>;
+def tCMPZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+                "cmp $lhs, $rhs",
+                [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>;
 
 }
 
@@ -383,9 +384,9 @@ let Defs = [CPSR] in {
 def tCMPr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                "cmp $lhs, $rhs",
                [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
-def tCMPNZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-                 "cmp $lhs, $rhs",
-                 [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>;
+def tCMPZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+                "cmp $lhs, $rhs",
+                [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>;
 }
 
 // TODO: A7-37: CMP(3) - cmp hi regs
@@ -551,7 +552,7 @@ def tSXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
 let isCommutable = 1, Defs = [CPSR] in
 def tTST  : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                "tst $lhs, $rhs",
-               [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
+               [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
 
 // zero-extend byte
 def tUXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
@@ -622,13 +623,13 @@ def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
 def : Tv5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
 
 // zextload i1 -> zextload i8
-def : TPat<(zextloadi1 t_addrmode_s1:$addr),
-           (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_s1:$addr),
+            (tLDRB t_addrmode_s1:$addr)>;
 
 // extload -> zextload
-def : TPat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : TPat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : TPat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
 
 // Large immediate handling.
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bfdf719..50345a6 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -89,7 +89,6 @@ def imm0_65535 : PatLeaf<(i32 imm), [{
   return (uint32_t)N->getZExtValue() < 65536;
 }]>;
 
-
 /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
 /// e.g., 0xf000ffff
 def bf_inv_mask_imm : Operand<i32>,
@@ -127,6 +126,42 @@ def t2_lo16AllZero : PatLeaf<(i32 imm), [{
   }], t2_hi16>;
 
 
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12  := reg + imm12
+def t2addrmode_imm12 : Operand<i32>,
+                       ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+  let PrintMethod = "printT2AddrModeImm12Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_imm8  := reg - imm8
+def t2addrmode_imm8 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+  let PrintMethod = "printT2AddrModeImm8Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2am_imm8_offset : Operand<i32>,
+                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", []>{
+  let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+}
+
+// t2addrmode_imm8s4  := reg + (imm8 << 2)
+def t2addrmode_imm8s4 : Operand<i32>,
+                        ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> {
+  let PrintMethod = "printT2AddrModeImm8Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_so_reg  := reg + reg << imm2
+def t2addrmode_so_reg : Operand<i32>,
+                        ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+  let PrintMethod = "printT2AddrModeSoRegOperand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+
 //===----------------------------------------------------------------------===//
 // Multiclass helpers...
 //
@@ -239,32 +274,32 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // register
    def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]> {
+                 Requires<[IsThumb2, CarryDefIsUnused]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // Carry setting variants
    // shifted imm
    def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
                   !strconcat(opc, "s $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                  Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
                   }
    // register
    def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                   !strconcat(opc, "s $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
-                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                  Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
                     let isCommutable = Commutable;
    }
@@ -272,7 +307,7 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
                   !strconcat(opc, "s $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
-                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                  Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
    }
 }
@@ -287,24 +322,24 @@ multiclass T2I_rsc_is<string opc, PatFrag opnode> {
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
                  opc, " $dst, $rhs, $lhs",
                  [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // shifted register
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
                  opc, " $dst, $rhs, $lhs",
                  [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // shifted imm
    def Sri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
                  !strconcat(opc, "s $dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                 Requires<[IsThumb2, CarryDefIsUsed]> {
                    let Defs = [CPSR];
    }
    // shifted register
    def Srs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
                  !strconcat(opc, "s $dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                 Requires<[IsThumb2, CarryDefIsUsed]> {
                    let Defs = [CPSR];
    }
 }
@@ -359,6 +394,71 @@ multiclass T2I_cmp_is<string opc, PatFrag opnode> {
 }
 }
 
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<string opc, PatFrag opnode> {
+  def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>;
+  def i8  : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>;
+  def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>;
+  def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]>;
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<string opc, PatFrag opnode> {
+  def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr),
+                   opc, " $src, $addr",
+                   [(opnode GPR:$src, t2addrmode_imm12:$addr)]>;
+  def i8  : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr),
+                   opc, " $src, $addr",
+                   [(opnode GPR:$src, t2addrmode_imm8:$addr)]>;
+  def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr),
+                   opc, " $src, $addr",
+                   [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>;
+}
+
+/// T2I_picld - Defines the PIC load pattern.
+class T2I_picld<string opc, PatFrag opnode> :
+      T2I<(outs GPR:$dst), (ins addrmodepc:$addr),
+          !strconcat("${addr:label}:\n\t", opc), " $dst, $addr",
+          [(set GPR:$dst, (opnode addrmodepc:$addr))]>;
+
+/// T2I_picst - Defines the PIC store pattern.
+class T2I_picst<string opc, PatFrag opnode> :
+      T2I<(outs), (ins GPR:$src, addrmodepc:$addr),
+          !strconcat("${addr:label}:\n\t", opc), " $src, $addr",
+          [(opnode GPR:$src, addrmodepc:$addr)]>;
+
+
+/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_unary_rrot<string opc, PatFrag opnode> {
+  def r     : T2I<(outs GPR:$dst), (ins GPR:$Src),
+                  opc, " $dst, $Src",
+                 [(set GPR:$dst, (opnode GPR:$Src))]>;
+  def r_rot : T2I<(outs GPR:$dst), (ins GPR:$Src, i32imm:$rot),
+                  opc, " $dst, $Src, ror $rot",
+                 [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>;
+}
+
+/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_bin_rrot<string opc, PatFrag opnode> {
+  def rr     : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+                  opc, " $dst, $LHS, $RHS",
+                  [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>;
+  def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+                  opc, " $dst, $LHS, $RHS, ror $rot",
+                  [(set GPR:$dst, (opnode GPR:$LHS,
+                                          (rotr GPR:$RHS, rot_imm:$rot)))]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -409,6 +509,209 @@ def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
 //  Load / store Instructions.
 //
 
+// Load
+let canFoldAsLoad = 1 in
+defm t2LDR   : T2I_ld<"ldr",  UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH  : T2I_ld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB  : T2I_ld<"ldrb", UnOpFrag<(zextloadi8  node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<"ldrsb", UnOpFrag<(sextloadi8  node:$Src)>>;
+
+let mayLoad = 1 in {
+// Load doubleword
+def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst), (ins t2addrmode_imm8s4:$addr),
+                       "ldrd", " $dst, $addr", []>;
+def t2LDRDpci : T2Ii8s4<(outs GPR:$dst), (ins i32imm:$addr),
+                       "ldrd", " $dst, $addr", []>;
+}
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+            (t2LDRHi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
+            (t2LDRHi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+            (t2LDRHs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+            (t2LDRHpci  tconstpool:$addr)>;
+
+// Indexed loads
+let mayLoad = 1 in {
+def t2LDR_PRE  : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldr", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+
+def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                           "ldr", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrb", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "ldrb", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrh", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "ldrh", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrsb", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                         "ldrsb", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrsh", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                         "ldrsh", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+}
+
+// Store
+defm t2STR   : T2I_st<"str",  BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB  : T2I_st<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH  : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayLoad = 1 in
+def t2STRDi8 : T2Ii8s4<(outs), (ins GPR:$src, t2addrmode_imm8s4:$addr),
+                        "strd", " $src, $addr", []>;
+
+// Indexed stores
+def t2STR_PRE  : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePre,
+                          "str", " $src, [$base, $offset]!", "$base = $base_wb",
+             [(set GPR:$base_wb,
+                   (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                           "str", " $src, [$base], $offset", "$base = $base_wb",
+             [(set GPR:$base_wb,
+                   (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRH_PRE  : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePre,
+                         "strh", " $src, [$base, $offset]!", "$base = $base_wb",
+        [(set GPR:$base_wb,
+              (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "strh", " $src, [$base], $offset", "$base = $base_wb",
+       [(set GPR:$base_wb,
+             (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRB_PRE  : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePre,
+                         "strb", " $src, [$base, $offset]!", "$base = $base_wb",
+         [(set GPR:$base_wb,
+               (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "strb", " $src, [$base], $offset", "$base = $base_wb",
+        [(set GPR:$base_wb,
+              (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1, AddedComplexity = 10 in {
+let canFoldAsLoad = 1 in
+def t2PICLDR   : T2I_picld<"ldr",  UnOpFrag<(load node:$Src)>>;
+
+def t2PICLDRH  : T2I_picld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
+def t2PICLDRB  : T2I_picld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>;
+def t2PICLDRSH : T2I_picld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
+def t2PICLDRSB : T2I_picld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
+
+def t2PICSTR   : T2I_picst<"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
+def t2PICSTRH  : T2I_picst<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+def t2PICSTRB  : T2I_picst<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+} // isNotDuplicable = 1, AddedComplexity = 10
+
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+let mayLoad = 1 in
+def t2LDM : T2XI<(outs),
+                 (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+                 "ldm${p}${addr:submode} $addr, $dst1", []>;
+
+let mayStore = 1 in
+def t2STM : T2XI<(outs),
+                 (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
+                 "stm${p}${addr:submode} $addr, $src1", []>;
+
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
@@ -435,6 +738,40 @@ def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
                            (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;
 
 //===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
+
+// Sign extenders
+
+defm t2SXTB  : T2I_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm t2SXTH  : T2I_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm t2SXTAB : T2I_bin_rrot<"sxtab",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm t2SXTAH : T2I_bin_rrot<"sxtah",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// TODO: SXT(A){B|H}16
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm t2UXTB   : T2I_unary_rrot<"uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm t2UXTH   : T2I_unary_rrot<"uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm t2UXTB16 : T2I_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+            (t2UXTB16r_rot GPR:$Src, 24)>;
+def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+            (t2UXTB16r_rot GPR:$Src, 8)>;
+
+defm t2UXTAB : T2I_bin_rrot<"uxtab",
+                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm t2UXTAH : T2I_bin_rrot<"uxtah",
+                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+//===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
 
@@ -563,21 +900,24 @@ def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src),
 
 defm t2CMP   : T2I_cmp_is<"cmp",
                           BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-defm t2CMPnz : T2I_cmp_is<"cmp",
-                          BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
+defm t2CMPz : T2I_cmp_is<"cmp",
+                         BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
 
 defm t2CMN   : T2I_cmp_is<"cmn",
                           BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
-defm t2CMNnz : T2I_cmp_is<"cmn",
-                          BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNz : T2I_cmp_is<"cmn",
+                         BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
 
 def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
             (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
 
-def : T2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
+def : T2Pat<(ARMcmpZ  GPR:$src, t2_so_imm_neg:$imm),
             (t2CMNri   GPR:$src, t2_so_imm_neg:$imm)>;
 
-// FIXME: TST, TEQ, etc.
+defm t2TST  : T2I_cmp_is<"tst",
+                         BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ  : T2I_cmp_is<"teq",
+                         BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
 
 // A8.6.27  CBNZ, CBZ - Compare and branch on (non)zero.
 // Short range conditional branch. Looks awesome for loops. Need to figure
@@ -585,6 +925,42 @@ def : T2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
 
 // FIXME: Conditional moves
 
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B   : T2XI<(outs), (ins brtarget:$target),
+                 "b $target",
+                 [(br bb:$target)]>;
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JTr : T2JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
+                     "mov pc, $target \n$jt",
+                     [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+
+def t2BR_JTm : 
+    T2JTI<(outs),
+          (ins t2addrmode_so_reg:$target, jtblock_operand:$jt, i32imm:$id),
+          "ldr pc, $target \n$jt",
+          [(ARMbrjt (i32 (load t2addrmode_so_reg:$target)), tjumptable:$jt,
+             imm:$id)]>;
+
+def t2BR_JTadd : 
+    T2JTI<(outs),
+          (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+          "add pc, $target, $idx \n$jt",
+          [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>;
+} // isNotDuplicate, isIndirectBranch
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), 
+                "b", " $target",
+                [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 0b0e289..66d3df6 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -30,6 +30,11 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// Used to initialized Align, so must precede it.
   bool isThumb;
 
+  /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+  /// to determine if function is compiled under Thumb mode, for that use
+  /// 'isThumb'.
+  bool hasThumb2;
+
   /// Align - required alignment.  ARM functions and Thumb functions with
   /// constant pools require 4-byte alignment; other Thumb functions
   /// require only 2-byte alignment.
@@ -91,7 +96,8 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
 public:
   ARMFunctionInfo() :
-    isThumb(false), 
+    isThumb(false),
+    hasThumb2(false),
     Align(2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
     LRSpilledForFarJump(false), R3IsLiveIn(false),
@@ -102,6 +108,7 @@ public:
 
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+    hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
     Align(isThumb ? 1U : 2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
     LRSpilledForFarJump(false), R3IsLiveIn(false),
@@ -112,6 +119,7 @@ public:
     JumpTableUId(0), ConstPoolEntryUId(0) {}
 
   bool isThumbFunction() const { return isThumb; }
+  bool isThumb2Function() const { return isThumb && hasThumb2; }
 
   unsigned getAlign() const { return Align; }
   void setAlign(unsigned a) { Align = a; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index bb0cc8f..f809f37 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -31,16 +31,9 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include <algorithm>
 using namespace llvm;
 
-static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging",
-                               cl::Hidden,
-                               cl::desc("Enable register scavenging on Thumb"));
-
-unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   using namespace ARM;
   switch (RegEnum) {
   case R0:  case S0:  case D0:  return 0;
@@ -81,8 +74,8 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   }
 }
 
-unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
-                                               bool &isSPVFP) {
+unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
+                                                   bool &isSPVFP) {
   isSPVFP = false;
 
   using namespace ARM;
@@ -108,12 +101,12 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
   case PC:  case D15: return 15;
 
   case S0: case S1: case S2: case S3:
-  case S4: case S5: case S6: case S7: 
-  case S8: case S9: case S10: case S11: 
-  case S12: case S13: case S14: case S15: 
-  case S16: case S17: case S18: case S19: 
-  case S20: case S21: case S22: case S23: 
-  case S24: case S25: case S26: case S27: 
+  case S4: case S5: case S6: case S7:
+  case S8: case S9: case S10: case S11:
+  case S12: case S13: case S14: case S15:
+  case S16: case S17: case S18: case S19:
+  case S20: case S21: case S22: case S23:
+  case S24: case S25: case S26: case S27:
   case S28: case S29: case S30: case S31:  {
     isSPVFP = true;
     switch (RegEnum) {
@@ -155,13 +148,18 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
   }
 }
 
-ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
-                                 const ARMSubtarget &sti)
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const TargetInstrInfo &tii,
+                                         const ARMSubtarget &sti)
   : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     TII(tii), STI(sti),
     FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
 }
 
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+                                 const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
 static inline
 const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
   return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
@@ -176,55 +174,22 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
 /// specified immediate.
 void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator &MBBI,
+                                        const TargetInstrInfo *TII, DebugLoc dl,
                                         unsigned DestReg, int Val,
-                                        unsigned Pred, unsigned PredReg,
-                                        const TargetInstrInfo *TII,
-                                        bool isThumb,
-                                        DebugLoc dl) const {
+                                        ARMCC::CondCodes Pred,
+                                        unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   Constant *C = ConstantInt::get(Type::Int32Ty, Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
-  if (isThumb)
-    BuildMI(MBB, MBBI, dl, 
-            TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx);
-  else
-    BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
-      .addConstantPoolIndex(Idx)
-      .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
-}
 
-/// isLowRegister - Returns true if the register is low register r0-r7.
-///
-bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
-  using namespace ARM;
-  switch (Reg) {
-  case R0:  case R1:  case R2:  case R3:
-  case R4:  case R5:  case R6:  case R7:
-    return true;
-  default:
-    return false;
-  }
-}
-
-const TargetRegisterClass*
-ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
-  if (STI.isThumb()) {
-    if (isLowRegister(Reg))
-      return ARM::tGPRRegisterClass;
-    switch (Reg) {
-    default:
-      break;
-    case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
-    case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
-      return ARM::GPRRegisterClass;
-    }
-  }
-  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
+    .addConstantPoolIndex(Idx)
+    .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
 }
 
 const unsigned*
-ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   static const unsigned CalleeSavedRegs[] = {
     ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
     ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
@@ -248,7 +213,7 @@ ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 }
 
 const TargetRegisterClass* const *
-ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
   static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
     &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
     &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
@@ -297,7 +262,7 @@ ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
 }
 
-BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   // FIXME: avoid re-calculating this everytime.
   BitVector Reserved(getNumRegs());
   Reserved.set(ARM::SP);
@@ -311,7 +276,7 @@ BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 }
 
 bool
-ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
+ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
   switch (Reg) {
   default: break;
   case ARM::SP:
@@ -329,16 +294,16 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
   return false;
 }
 
-const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+const TargetRegisterClass *ARMBaseRegisterInfo::getPointerRegClass() const {
   return &ARM::GPRRegClass;
 }
 
 /// getAllocationOrder - Returns the register allocation order for a specified
 /// register class in the form of a pair of TargetRegisterClass iterators.
 std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
-                                    unsigned HintType, unsigned HintReg,
-                                    const MachineFunction &MF) const {
+ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+                                        unsigned HintType, unsigned HintReg,
+                                        const MachineFunction &MF) const {
   // Alternative register allocation orders when favoring even / odd registers
   // of register pairs.
 
@@ -479,8 +444,8 @@ ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
 /// ResolveRegAllocHint - Resolves the specified register allocation hint
 /// to a physical register. Returns the physical register if it is successful.
 unsigned
-ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                                     const MachineFunction &MF) const {
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                         const MachineFunction &MF) const {
   if (Reg == 0 || !isPhysicalRegister(Reg))
     return 0;
   if (Type == 0)
@@ -495,8 +460,8 @@ ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
 }
 
 void
-ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
-                                    MachineFunction &MF) const {
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                        MachineFunction &MF) const {
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
   if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
@@ -516,15 +481,14 @@ ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 
 bool
 ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  return ThumbRegScavenging || !AFI->isThumbFunction();
+  return true;
 }
 
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
 ///
-bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
+bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   return (NoFramePointerElim ||
           MFI->hasVarSizedObjects() ||
@@ -539,18 +503,13 @@ bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
 bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
   const MachineFrameInfo *FFI = MF.getFrameInfo();
   unsigned CFSize = FFI->getMaxCallFrameSize();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   // It's not always a good idea to include the call frame as part of the
   // stack frame. ARM (especially Thumb) has small immediate offset to
   // address the stack frame. So a large call frame can cause poor codegen
   // and may even makes it impossible to scavenge a register.
-  if (AFI->isThumbFunction()) {
-    if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
-      return false;
-  } else {
-    if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
-      return false;
-  }
+  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
+    return false;
+
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
@@ -570,14 +529,14 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
     unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
     unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
     assert(ThisVal && "Didn't extract field correctly");
-    
+
     // We will handle these bits from offset, clear them.
     NumBytes &= ~ThisVal;
-    
+
     // Get the properly encoded SOImmVal field.
     int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
     assert(SOImmVal != -1 && "Bit extraction didn't work?");
-    
+
     // Build the new ADD / SUB.
     BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
       .addReg(BaseReg, RegState::Kill).addImm(SOImmVal)
@@ -586,204 +545,13 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
   }
 }
 
-/// calcNumMI - Returns the number of instructions required to materialize
-/// the specific add / sub r, c instruction.
-static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
-                          unsigned NumBits, unsigned Scale) {
-  unsigned NumMIs = 0;
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-
-  if (Opc == ARM::tADDrSPi) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    NumMIs++;
-    NumBits = 8;
-    Scale = 1;  // Followed by a number of tADDi8.
-    Chunk = ((1 << NumBits) - 1) * Scale;
-  }
-
-  NumMIs += Bytes / Chunk;
-  if ((Bytes % Chunk) != 0)
-    NumMIs++;
-  if (ExtraOpc)
-    NumMIs++;
-  return NumMIs;
-}
-
-/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
-/// in a register using mov / mvn sequences or load the immediate from a
-/// constpool entry.
-static
-void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              unsigned DestReg, unsigned BaseReg,
-                              int NumBytes, bool CanChangeCC,
-                              const TargetInstrInfo &TII,
-                              const ARMRegisterInfo& MRI,
-                              DebugLoc dl) {
-    bool isHigh = !MRI.isLowRegister(DestReg) ||
-                  (BaseReg != 0 && !MRI.isLowRegister(BaseReg));
-    bool isSub = false;
-    // Subtract doesn't have high register version. Load the negative value
-    // if either base or dest register is a high register. Also, if do not
-    // issue sub as part of the sequence if condition register is to be
-    // preserved.
-    if (NumBytes < 0 && !isHigh && CanChangeCC) {
-      isSub = true;
-      NumBytes = -NumBytes;
-    }
-    unsigned LdReg = DestReg;
-    if (DestReg == ARM::SP) {
-      assert(BaseReg == ARM::SP && "Unexpected!");
-      LdReg = ARM::R3;
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R3, RegState::Kill);
-    }
-
-    if (NumBytes <= 255 && NumBytes >= 0)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
-    else if (NumBytes < 0 && NumBytes >= -255) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
-        .addReg(LdReg, RegState::Kill);
-    } else
-      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII, 
-                            true, dl);
-
-    // Emit add / sub.
-    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
-    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, 
-                                            TII.get(Opc), DestReg);
-    if (DestReg == ARM::SP || isSub)
-      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
-    else
-      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
-    if (DestReg == ARM::SP)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-        .addReg(ARM::R12, RegState::Kill);
-}
-
-/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code.
-static
-void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator &MBBI,
-                               unsigned DestReg, unsigned BaseReg,
-                               int NumBytes, const TargetInstrInfo &TII,
-                               const ARMRegisterInfo& MRI,
-                               DebugLoc dl) {
-  bool isSub = NumBytes < 0;
-  unsigned Bytes = (unsigned)NumBytes;
-  if (isSub) Bytes = -NumBytes;
-  bool isMul4 = (Bytes & 3) == 0;
-  bool isTwoAddr = false;
-  bool DstNotEqBase = false;
-  unsigned NumBits = 1;
-  unsigned Scale = 1;
-  int Opc = 0;
-  int ExtraOpc = 0;
-
-  if (DestReg == BaseReg && BaseReg == ARM::SP) {
-    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
-    NumBits = 7;
-    Scale = 4;
-    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
-    isTwoAddr = true;
-  } else if (!isSub && BaseReg == ARM::SP) {
-    // r1 = add sp, 403
-    // =>
-    // r1 = add sp, 100 * 4
-    // r1 = add r1, 3
-    if (!isMul4) {
-      Bytes &= ~3;
-      ExtraOpc = ARM::tADDi3;
-    }
-    NumBits = 8;
-    Scale = 4;
-    Opc = ARM::tADDrSPi;
-  } else {
-    // sp = sub sp, c
-    // r1 = sub sp, c
-    // r8 = sub sp, c
-    if (DestReg != BaseReg)
-      DstNotEqBase = true;
-    NumBits = 8;
-    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-    isTwoAddr = true;
-  }
-
-  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
-  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
-  if (NumMIs > Threshold) {
-    // This will expand into too many instructions. Load the immediate from a
-    // constpool entry.
-    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
-                             MRI, dl);
-    return;
-  }
-
-  if (DstNotEqBase) {
-    if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) {
-      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
-      unsigned Chunk = (1 << 3) - 1;
-      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-      Bytes -= ThisVal;
-      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
-        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
-    } else {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
-        .addReg(BaseReg, RegState::Kill);
-    }
-    BaseReg = DestReg;
-  }
-
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-  while (Bytes) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    ThisVal /= Scale;
-    // Build the new tADD / tSUB.
-    if (isTwoAddr)
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(DestReg).addImm(ThisVal);
-    else {
-      bool isKill = BaseReg != ARM::SP;
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
-      BaseReg = DestReg;
-
-      if (Opc == ARM::tADDrSPi) {
-        // r4 = add sp, imm
-        // r4 = add r4, imm
-        // ...
-        NumBits = 8;
-        Scale = 1;
-        Chunk = ((1 << NumBits) - 1) * Scale;
-        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-        isTwoAddr = true;
-      }
-    }
-  }
-
-  if (ExtraOpc)
-    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
-      .addReg(DestReg, RegState::Kill)
-      .addImm(((unsigned)NumBytes) & 3);
-}
-
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                  int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg,
-                  bool isThumb, const TargetInstrInfo &TII, 
-                  const ARMRegisterInfo& MRI,
-                  DebugLoc dl) {
-  if (isThumb)
-    emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
-                              MRI, dl);
-  else
-    emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
-                            Pred, PredReg, TII, dl);
+static void
+emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             const TargetInstrInfo &TII, DebugLoc dl,
+             int NumBytes,
+             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+  emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
+                          Pred, PredReg, TII, dl);
 }
 
 void ARMRegisterInfo::
@@ -797,7 +565,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     DebugLoc dl = Old->getDebugLoc();
     unsigned Amount = Old->getOperand(0).getImm();
     if (Amount != 0) {
-      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
@@ -806,46 +573,22 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
       // Replace the pseudo instruction with a new instruction...
       unsigned Opc = Old->getOpcode();
-      bool isThumb = AFI->isThumbFunction();
-      ARMCC::CondCodes Pred = isThumb
-        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm();
+      ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm();
       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
         // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
-        unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg();
-        emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl);
+        unsigned PredReg = Old->getOperand(2).getReg();
+        emitSPUpdate(MBB, I, TII, dl, -Amount, Pred, PredReg);
       } else {
         // Note: PredReg is operand 3 for ADJCALLSTACKUP.
-        unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg();
+        unsigned PredReg = Old->getOperand(3).getReg();
         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
-        emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl);
+        emitSPUpdate(MBB, I, TII, dl, Amount, Pred, PredReg);
       }
     }
   }
   MBB.erase(I);
 }
 
-/// emitThumbConstant - Emit a series of instructions to materialize a
-/// constant.
-static void emitThumbConstant(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              unsigned DestReg, int Imm,
-                              const TargetInstrInfo &TII,
-                              const ARMRegisterInfo& MRI,
-                              DebugLoc dl) {
-  bool isSub = Imm < 0;
-  if (isSub) Imm = -Imm;
-
-  int Chunk = (1 << 8) - 1;
-  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
-  Imm -= ThisVal;
-  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
-  if (Imm > 0) 
-    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
-  if (isSub)
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
-      .addReg(DestReg, RegState::Kill);
-}
-
 /// findScratchRegister - Find a 'free' ARM register. If register scavenger
 /// is not being used, R12 is available. Otherwise, try for a call-clobbered
 /// register first and then a spilled callee-saved register if that fails.
@@ -868,17 +611,16 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isThumb = AFI->isThumbFunction();
   DebugLoc dl = MI.getDebugLoc();
 
   while (!MI.getOperand(i).isFI()) {
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }
-  
+
   unsigned FrameReg = ARM::SP;
   int FrameIndex = MI.getOperand(i).getIndex();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + 
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
                MF.getFrameInfo()->getStackSize() + SPAdj;
 
   if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
@@ -926,86 +668,20 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
       return;
     }
-    
+
     // Otherwise, we fallback to common code below to form the imm offset with
     // a sequence of ADDri instructions.  First though, pull as much of the imm
     // into this ADDri as possible.
     unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
     unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
-    
+
     // We will handle these bits from offset, clear them.
     Offset &= ~ThisImmVal;
-    
+
     // Get the properly encoded SOImmVal field.
     int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
-    assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");    
+    assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");
     MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
-  } else if (Opcode == ARM::tADDrSPi) {
-    Offset += MI.getOperand(i+1).getImm();
-
-    // Can't use tADDrSPi if it's based off the frame pointer.
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    if (FrameReg != ARM::SP) {
-      Opcode = ARM::tADDi3;
-      MI.setDesc(TII.get(ARM::tADDi3));
-      NumBits = 3;
-    } else {
-      NumBits = 8;
-      Scale = 4;
-      assert((Offset & 3) == 0 &&
-             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
-    }
-
-    if (Offset == 0) {
-      // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVhir2lor));
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.RemoveOperand(i+1);
-      return;
-    }
-
-    // Common case: small offset, fits into instruction.
-    unsigned Mask = (1 << NumBits) - 1;
-    if (((Offset / Scale) & ~Mask) == 0) {
-      // Replace the FrameIndex with sp / fp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
-      return;
-    }
-
-    unsigned DestReg = MI.getOperand(0).getReg();
-    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
-    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
-    // MI would expand into a large number of instructions. Don't try to
-    // simplify the immediate.
-    if (NumMIs > 2) {
-      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
-                                *this, dl);
-      MBB.erase(II);
-      return;
-    }
-
-    if (Offset > 0) {
-      // Translate r0 = add sp, imm to
-      // r0 = add sp, 255*4
-      // r0 = add r0, (imm - 255*4)
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Mask);
-      Offset = (Offset - Mask * Scale);
-      MachineBasicBlock::iterator NII = next(II);
-      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
-                                *this, dl);
-    } else {
-      // Translate r0 = add sp, -imm to
-      // r0 = -imm (this is then translated into a series of instructons)
-      // r0 = add r0, sp
-      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
-      MI.setDesc(TII.get(ARM::tADDhirr));
-      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
-      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
-    }
-    return;
   } else {
     unsigned ImmIdx = 0;
     int InstrOffs = 0;
@@ -1037,13 +713,6 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       Scale = 4;
       break;
     }
-    case ARMII::AddrModeTs: {
-      ImmIdx = i+1;
-      InstrOffs = MI.getOperand(ImmIdx).getImm();
-      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
-      Scale = 4;
-      break;
-    }
     default:
       assert(0 && "Unsupported addressing mode!");
       abort();
@@ -1052,7 +721,7 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
     Offset += InstrOffs * Scale;
     assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
-    if (Offset < 0 && !isThumb) {
+    if (Offset < 0) {
       Offset = -Offset;
       isSub = true;
     }
@@ -1070,121 +739,34 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       return;
     }
 
-    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
-    if (AddrMode == ARMII::AddrModeTs) {
-      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
-      // a different base register.
-      NumBits = 5;
-      Mask = (1 << NumBits) - 1;
-    }
-    // If this is a thumb spill / restore, we will be using a constpool load to
-    // materialize the offset.
-    if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore)
-      ImmOp.ChangeToImmediate(0);
-    else {
-      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
-      ImmedOffset = ImmedOffset & Mask;
-      if (isSub)
-        ImmedOffset |= 1 << NumBits;
-      ImmOp.ChangeToImmediate(ImmedOffset);
-      Offset &= ~(Mask*Scale);
-    }
+    // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+    ImmedOffset = ImmedOffset & Mask;
+    if (isSub)
+      ImmedOffset |= 1 << NumBits;
+    ImmOp.ChangeToImmediate(ImmedOffset);
+    Offset &= ~(Mask*Scale);
   }
-  
+
   // If we get here, the immediate doesn't fit into the instruction.  We folded
   // as much as possible above, handle the rest, providing a register that is
   // SP+LargeImm.
   assert(Offset && "This code isn't needed if offset already handled!");
 
-  if (isThumb) {
-    if (Desc.mayLoad()) {
-      // Use the destination register to materialize sp + offset.
-      unsigned TmpReg = MI.getOperand(0).getReg();
-      bool UseRR = false;
-      if (Opcode == ARM::tRestore) {
-        if (FrameReg == ARM::SP)
-          emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                   Offset, false, TII, *this, dl);
-        else {
-          emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
-                            true, dl);
-          UseRR = true;
-        }
-      } else
-        emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                  *this, dl);
-      MI.setDesc(TII.get(ARM::tLDR));
-      MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
-      if (UseRR)
-        // Use [reg, reg] addrmode.
-        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-      else  // tLDR has an extra register operand.
-        MI.addOperand(MachineOperand::CreateReg(0, false));
-    } else if (Desc.mayStore()) {
-      // FIXME! This is horrific!!! We need register scavenging.
-      // Our temporary workaround has marked r3 unavailable. Of course, r3 is
-      // also a ABI register so it's possible that is is the register that is
-      // being storing here. If that's the case, we do the following:
-      // r12 = r2
-      // Use r2 to materialize sp + offset
-      // str r3, r2
-      // r2 = r12
-      unsigned ValReg = MI.getOperand(0).getReg();
-      unsigned TmpReg = ARM::R3;
-      bool UseRR = false;
-      if (ValReg == ARM::R3) {
-        BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-          .addReg(ARM::R2, RegState::Kill);
-        TmpReg = ARM::R2;
-      }
-      if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-        BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-          .addReg(ARM::R3, RegState::Kill);
-      if (Opcode == ARM::tSpill) {
-        if (FrameReg == ARM::SP)
-          emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                   Offset, false, TII, *this, dl);
-        else {
-          emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
-                            true, dl);
-          UseRR = true;
-        }
-      } else
-        emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                  *this, dl);
-      MI.setDesc(TII.get(ARM::tSTR));
-      MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
-      if (UseRR)  // Use [reg, reg] addrmode.
-        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-      else // tSTR has an extra register operand.
-        MI.addOperand(MachineOperand::CreateReg(0, false));
-
-      MachineBasicBlock::iterator NII = next(II);
-      if (ValReg == ARM::R3)
-        BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
-          .addReg(ARM::R12, RegState::Kill);
-      if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-        BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-          .addReg(ARM::R12, RegState::Kill);
-    } else
-      assert(false && "Unexpected opcode!");
-  } else {
-    // Insert a set of r12 with the full address: r12 = sp + offset
-    // If the offset we have is too large to fit into the instruction, we need
-    // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
-    // out of 'Offset'.
-    unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
-    if (ScratchReg == 0)
-      // No register is "free". Scavenge a register.
-      ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
-    int PIdx = MI.findFirstPredOperandIdx();
-    ARMCC::CondCodes Pred = (PIdx == -1)
-      ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
-    unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
-    emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
-                            isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
-    MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
-  }
+  // Insert a set of r12 with the full address: r12 = sp + offset
+  // If the offset we have is too large to fit into the instruction, we need
+  // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
+  // out of 'Offset'.
+  unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
+  if (ScratchReg == 0)
+    // No register is "free". Scavenge a register.
+    ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
+  int PIdx = MI.findFirstPredOperandIdx();
+  ARMCC::CondCodes Pred = (PIdx == -1)
+    ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+  unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+  emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
+                          isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
+  MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
 }
 
 static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
@@ -1206,8 +788,8 @@ static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
 }
 
 void
-ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                      RegScavenger *RS) const {
+ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                          RegScavenger *RS) const {
   // This tells PEI to spill the FP as if it is any other callee-save register
   // to take advantage the eliminateFrameIndex machinery. This also ensures it
   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
@@ -1266,7 +848,7 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         default:
           break;
         }
-      } else { 
+      } else {
         if (!STI.isTargetDarwin()) {
           UnspilledCS1GPRs.push_back(Reg);
           continue;
@@ -1332,7 +914,8 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
           unsigned Reg = UnspilledCS1GPRs[i];
           // Don't spiil high register if the function is thumb
-          if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) {
+          if (!AFI->isThumbFunction() ||
+              isARMLowRegister(Reg) || Reg == ARM::LR) {
             MF.getRegInfo().setPhysRegUsed(Reg);
             AFI->setCSRegisterIsSpilled(Reg);
             if (!isReservedReg(MF, Reg))
@@ -1351,7 +934,7 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     }
 
     // Estimate if we might need to scavenge a register at some point in order
-    // to materialize a stack offset. If so, either spill one additiona
+    // to materialize a stack offset. If so, either spill one additional
     // callee-saved register or reserve a special spill slot to facilitate
     // register scavenging.
     if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) {
@@ -1460,40 +1043,23 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo  *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isThumb = AFI->isThumbFunction();
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = (MBBI != MBB.end() ?
                  MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
 
-  if (isThumb) {
-    // Check if R3 is live in. It might have to be used as a scratch register.
-    for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
-         E = MF.getRegInfo().livein_end(); I != E; ++I) {
-      if (I->first == ARM::R3) {
-        AFI->setR3IsLiveIn(true);
-        break;
-      }
-    }
-
-    // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
-    NumBytes = (NumBytes + 3) & ~3;
-    MFI->setStackSize(NumBytes);
-  }
-
   // Determine the sizes of each callee-save spill areas and record which frame
   // belongs to which callee-save spill areas.
   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
   int FramePtrSpillFI = 0;
 
   if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
-                 *this, dl);
+    emitSPUpdate(MBB, MBBI, TII, dl, -VARegSaveSize);
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+      emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes);
     return;
   }
 
@@ -1531,34 +1097,25 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
     }
   }
 
-  if (!isThumb) {
-    // Build the new SUBri to adjust SP for integer callee-save spill area 1.
-    emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl);
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
-  } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
-    ++MBBI;
-    if (MBBI != MBB.end())
-      dl = MBBI->getDebugLoc();
-  }
+  // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+  emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS1Size);
+  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
 
   // Darwin ABI requires FP to point to the stack slot that contains the
   // previous FP.
   if (STI.isTargetDarwin() || hasFP(MF)) {
     MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), 
-              FramePtr)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::ADDri), FramePtr)
       .addFrameIndex(FramePtrSpillFI).addImm(0);
-    if (!isThumb) AddDefaultCC(AddDefaultPred(MIB));
+    AddDefaultCC(AddDefaultPred(MIB));
   }
 
-  if (!isThumb) {
-    // Build the new SUBri to adjust SP for integer callee-save spill area 2.
-    emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl);
+  // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+  emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS2Size);
 
-    // Build the new SUBri to adjust SP for FP callee-save spill area.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
-    emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl);
-  }
+  // Build the new SUBri to adjust SP for FP callee-save spill area.
+  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+  emitSPUpdate(MBB, MBBI, TII, dl, -DPRCSSize);
 
   // Determine starting offsets of spill areas.
   unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
@@ -1568,16 +1125,15 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-  
+
   NumBytes = DPRCSOffset;
   if (NumBytes) {
     // Insert it after all the callee-save spills.
-    if (!isThumb)
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
-    emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes);
   }
 
-  if(STI.isTargetELF() && hasFP(MF)) {
+  if (STI.isTargetELF() && hasFP(MF)) {
     MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
                              AFI->getFramePtrSpillOffset());
   }
@@ -1596,8 +1152,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 
 static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
   return ((MI->getOpcode() == ARM::FLDD ||
-           MI->getOpcode() == ARM::LDR  ||
-           MI->getOpcode() == ARM::tRestore) &&
+           MI->getOpcode() == ARM::LDR) &&
           MI->getOperand(1).isFI() &&
           isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
 }
@@ -1605,20 +1160,17 @@ static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
 void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert((MBBI->getOpcode() == ARM::BX_RET ||
-          MBBI->getOpcode() == ARM::tBX_RET ||
-          MBBI->getOpcode() == ARM::tPOP_RET) &&
+  assert(MBBI->getOpcode() == ARM::BX_RET &&
          "Can only insert epilog into returning blocks");
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isThumb = AFI->isThumbFunction();
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   int NumBytes = (int)MFI->getStackSize();
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+      emitSPUpdate(MBB, MBBI, TII, dl, NumBytes);
   } else {
     // Unwind MBBI to point to first LDR / FLDD.
     const unsigned *CSRegs = getCalleeSavedRegs();
@@ -1634,110 +1186,73 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
     NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
                  AFI->getGPRCalleeSavedArea2Size() +
                  AFI->getDPRCalleeSavedAreaSize());
-    if (isThumb) {
-      if (hasFP(MF)) {
-        NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-        // Reset SP based on frame pointer only if the stack frame extends beyond
-        // frame pointer stack slot or target is ELF and the function has FP.
+
+    // Darwin ABI requires FP to point to the stack slot that contains the
+    // previous FP.
+    if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot or target is ELF and the function has FP.
+      if (AFI->getGPRCalleeSavedArea2Size() ||
+          AFI->getDPRCalleeSavedAreaSize()  ||
+          AFI->getDPRCalleeSavedAreaOffset()||
+          hasFP(MF)) {
         if (NumBytes)
-          emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
-                                    TII, *this, dl);
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+            .addImm(NumBytes)
+            .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
         else
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
-            .addReg(FramePtr);
-      } else {
-        if (MBBI->getOpcode() == ARM::tBX_RET &&
-            &MBB.front() != MBBI &&
-            prior(MBBI)->getOpcode() == ARM::tPOP) {
-          MachineBasicBlock::iterator PMBBI = prior(MBBI);
-          emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
-                       *this, dl);
-        } else
-          emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
-                       *this, dl);
-      }
-    } else {
-      // Darwin ABI requires FP to point to the stack slot that contains the
-      // previous FP.
-      if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
-        NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-        // Reset SP based on frame pointer only if the stack frame extends beyond
-        // frame pointer stack slot or target is ELF and the function has FP.
-        if (AFI->getGPRCalleeSavedArea2Size() ||
-            AFI->getDPRCalleeSavedAreaSize()  ||
-            AFI->getDPRCalleeSavedAreaOffset()||
-            hasFP(MF)) {
-          if (NumBytes)
-            BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
-              .addImm(NumBytes)
-              .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-          else
-            BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
-              .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-        }
-      } else if (NumBytes) {
-        emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl);
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
+            .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
       }
+    } else if (NumBytes) {
+      emitSPUpdate(MBB, MBBI, TII, dl, NumBytes);
+    }
 
-      // Move SP to start of integer callee save spill area 2.
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
-      emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0,
-                   false, TII, *this, dl);
+    // Move SP to start of integer callee save spill area 2.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getDPRCalleeSavedAreaSize());
 
-      // Move SP to start of integer callee save spill area 1.
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
-      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0,
-                   false, TII, *this, dl);
+    // Move SP to start of integer callee save spill area 1.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea2Size());
 
-      // Move SP to SP upon entry to the function.
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
-      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0,
-                   false, TII, *this, dl);
-    }
+    // Move SP to SP upon entry to the function.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea1Size());
   }
 
-  if (VARegSaveSize) {
-    if (isThumb)
-      // Epilogue for vararg functions: pop LR to R3 and branch off it.
-      // FIXME: Verify this is still ok when R3 is no longer being reserved.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
-
-    emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
-                 *this, dl);
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, VARegSaveSize);
 
-    if (isThumb) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
-      MBB.erase(MBBI);
-    }
-  }
 }
 
-unsigned ARMRegisterInfo::getRARegister() const {
+unsigned ARMBaseRegisterInfo::getRARegister() const {
   return ARM::LR;
 }
 
-unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const {
   if (STI.isTargetDarwin() || hasFP(MF))
     return FramePtr;
   return ARM::SP;
 }
 
-unsigned ARMRegisterInfo::getEHExceptionRegister() const {
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
   assert(0 && "What is the exception register");
   return 0;
 }
 
-unsigned ARMRegisterInfo::getEHHandlerRegister() const {
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
   assert(0 && "What is the exception handler register");
   return 0;
 }
 
-int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
-unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
-                                              const MachineFunction &MF) const {
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                               const MachineFunction &MF) const {
   switch (Reg) {
   default: break;
   // Return 0 if either register of the pair is a special register.
@@ -1810,7 +1325,7 @@ unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
   return 0;
 }
 
-unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg,
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
                                              const MachineFunction &MF) const {
   switch (Reg) {
   default: break;
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e8f4fd8..7fe075a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -14,6 +14,7 @@
 #ifndef ARMREGISTERINFO_H
 #define ARMREGISTERINFO_H
 
+#include "ARM.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "ARMGenRegisterInfo.h.inc"
 
@@ -30,21 +31,28 @@ namespace ARMRI {
   };
 }
 
-struct ARMRegisterInfo : public ARMGenRegisterInfo {
+/// isARMLowRegister - Returns true if the register is low register r0-r7.
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  case R0:  case R1:  case R2:  case R3:
+  case R4:  case R5:  case R6:  case R7:
+    return true;
+  default:
+    return false;
+  }
+}
+
+struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
   const TargetInstrInfo &TII;
   const ARMSubtarget &STI;
 
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
 public:
-  ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
-
-  /// emitLoadConstPool - Emits a load from constpool to materialize the
-  /// specified immediate.
-  void emitLoadConstPool(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         unsigned DestReg, int Val,
-                         unsigned Pred, unsigned PredReg,
-                         const TargetInstrInfo *TII, bool isThumb,
-                         DebugLoc dl) const;
+  ARMBaseRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
 
   /// getRegisterNumbering - Given the enum value for some register, e.g.
   /// ARM::LR, return the number that it corresponds to (e.g. 14).
@@ -55,8 +63,6 @@ public:
   static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
 
   /// Code Generation virtual methods...
-  const TargetRegisterClass *
-    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
   const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   const TargetRegisterClass* const*
@@ -79,25 +85,11 @@ public:
   void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const;
 
-  bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
   bool hasFP(const MachineFunction &MF) const;
 
-  bool hasReservedCallFrame(MachineFunction &MF) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
-
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
 
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(MachineFunction &MF) const;
@@ -107,17 +99,44 @@ public:
   unsigned getEHHandlerRegister() const;
 
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
-  
+
   bool isLowRegister(unsigned Reg) const;
 
 private:
-  /// FramePtr - ARM physical register used as frame ptr.
-  unsigned FramePtr;
-
   unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
 
   unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+public:
+  ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo *TII, DebugLoc dl,
+                         unsigned DestReg, int Val,
+                         ARMCC::CondCodes Pred = ARMCC::AL,
+                         unsigned PredReg = 0) const;
+
+  /// Code Generation virtual methods...
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
 
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index c3cc7ff..5110b31 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -108,7 +108,8 @@ protected:
 
   bool isThumb() const { return IsThumb; }
   bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool hasThumb2() const { return ThumbMode >= Thumb2; }
 
   bool isR9Reserved() const { return IsR9Reserved; }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index f7b8215..2344733 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -95,13 +95,18 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
 }
 
 ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
-  : ARMBaseTargetMachine(M, FS, true), InstrInfo(Subtarget),
+  : ARMBaseTargetMachine(M, FS, true),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:32-i64:32:32-"
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
     TLInfo(*this) {
+  // Create the approriate type of Thumb InstrInfo
+  if (Subtarget.hasThumb2())
+    InstrInfo = new Thumb2InstrInfo(Subtarget);
+  else
+    InstrInfo = new Thumb1InstrInfo(Subtarget);
 }
 
 unsigned ARMTargetMachine::getJITMatchQuality() {
@@ -179,7 +184,7 @@ bool ARMBaseTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
 
   return false;
 }
@@ -198,7 +203,7 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -217,7 +222,7 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -232,7 +237,7 @@ bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -247,7 +252,7 @@ bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 0b49b92..a0df54d 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -22,7 +22,8 @@
 #include "ARMJITInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
-#include "ThumbInstrInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
 
 namespace llvm {
 
@@ -43,7 +44,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             ARMBaseTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
@@ -112,23 +112,27 @@ public:
 };
 
 /// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+///   Thumb-1 and Thumb-2.
 ///
 class ThumbTargetMachine : public ARMBaseTargetMachine {
-  ThumbInstrInfo      InstrInfo;
-  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMBaseInstrInfo    *InstrInfo;   // either Thumb1InstrInfo or Thumb2InstrInfo
+  const TargetData    DataLayout;   // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
 public:
   ThumbTargetMachine(const Module &M, const std::string &FS);
 
-  virtual const ARMRegisterInfo  *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
+  /// returns either Thumb1RegisterInfo of Thumb2RegisterInfo
+  virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo->getRegisterInfo();
   }
 
-  virtual       ARMTargetLowering *getTargetLowering() const {
+  virtual ARMTargetLowering *getTargetLowering() const {
     return const_cast<ARMTargetLowering*>(&TLInfo);
   }
 
-  virtual const ThumbInstrInfo   *getInstrInfo() const { return &InstrInfo; }
+  /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+  virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 
   static unsigned getJITMatchQuality();
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 400f628a..434a19a 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -82,9 +82,8 @@ namespace {
     bool InCPMode;
   public:
     explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                           bool V)
-      : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL),
+                           const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), DW(0), AFI(NULL), MCP(NULL),
         InCPMode(false) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
@@ -93,45 +92,50 @@ namespace {
       return "ARM Assembly Printer";
     }
 
-    void printOperand(const MachineInstr *MI, int opNum,
+    void printOperand(const MachineInstr *MI, int OpNum,
                       const char *Modifier = 0);
-    void printSOImmOperand(const MachineInstr *MI, int opNum);
-    void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
-    void printSORegOperand(const MachineInstr *MI, int opNum);
-    void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
-    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
-    void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
-    void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
-    void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+    void printSOImmOperand(const MachineInstr *MI, int OpNum);
+    void printSOImm2PartOperand(const MachineInstr *MI, int OpNum);
+    void printSORegOperand(const MachineInstr *MI, int OpNum);
+    void printAddrMode2Operand(const MachineInstr *MI, int OpNum);
+    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNum);
+    void printAddrMode3Operand(const MachineInstr *MI, int OpNum);
+    void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNum);
+    void printAddrMode4Operand(const MachineInstr *MI, int OpNum,
                                const char *Modifier = 0);
-    void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+    void printAddrMode5Operand(const MachineInstr *MI, int OpNum,
                                const char *Modifier = 0);
-    void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+    void printAddrMode6Operand(const MachineInstr *MI, int OpNum);
+    void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
                                 const char *Modifier = 0);
-    void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNo);
+    void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
 
-    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum,
                                       unsigned Scale);
-    void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum);
 
-    void printT2SOImmOperand(const MachineInstr *MI, int opNum);
+    void printT2SOImmOperand(const MachineInstr *MI, int OpNum);
     void printT2SOOperand(const MachineInstr *MI, int OpNum);
-
-    void printPredicateOperand(const MachineInstr *MI, int opNum);
-    void printSBitModifierOperand(const MachineInstr *MI, int opNum);
-    void printPCLabel(const MachineInstr *MI, int opNum);
-    void printRegisterList(const MachineInstr *MI, int opNum);
-    void printCPInstOperand(const MachineInstr *MI, int opNum,
+    void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum);
+
+    void printPredicateOperand(const MachineInstr *MI, int OpNum);
+    void printSBitModifierOperand(const MachineInstr *MI, int OpNum);
+    void printPCLabel(const MachineInstr *MI, int OpNum);
+    void printRegisterList(const MachineInstr *MI, int OpNum);
+    void printCPInstOperand(const MachineInstr *MI, int OpNum,
                             const char *Modifier);
-    void printJTBlockOperand(const MachineInstr *MI, int opNum);
+    void printJTBlockOperand(const MachineInstr *MI, int OpNum);
 
-    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                  unsigned AsmVariant, const char *ExtraCode);
-    virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+    virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
                                        unsigned AsmVariant,
                                        const char *ExtraCode);
 
@@ -232,15 +236,16 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   printVisibility(CurrentFnName, F->getVisibility());
 
   if (AFI->isThumbFunction()) {
-    EmitAlignment(1, F, AFI->getAlign());
+    EmitAlignment(MF.getAlignment(), F, AFI->getAlign());
     O << "\t.code\t16\n";
     O << "\t.thumb_func";
     if (Subtarget->isTargetDarwin())
       O << "\t" << CurrentFnName;
     O << "\n";
     InCPMode = false;
-  } else
-    EmitAlignment(2, F);
+  } else {
+    EmitAlignment(MF.getAlignment(), F);
+  }
 
   O << CurrentFnName << ":\n";
   // Emit pre-function debug information.
@@ -282,9 +287,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  const char *Modifier) {
-  const MachineOperand &MO = MI->getOperand(opNum);
+  const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
     unsigned Reg = MO.getReg();
@@ -584,6 +589,22 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
   O << "]";
 }
 
+void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+  // FIXME: No support yet for specifying alignment.
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
+
+  if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
+    if (MO2.getReg() == 0)
+      O << "!";
+    else
+      O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+  }
+}
+
 void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
                                            const char *Modifier) {
   if (Modifier && strcmp(Modifier, "label") == 0) {
@@ -606,6 +627,8 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
   O << "#" << lsb << ", #" << width;
 }
 
+//===--------------------------------------------------------------------===//
+
 void
 ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
@@ -659,6 +682,8 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   O << "]";
 }
 
+//===--------------------------------------------------------------------===//
+
 /// printT2SOImmOperand - T2SOImm is:
 ///  1. a 4-bit splat control value and 8 bit immediate value
 ///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
@@ -694,47 +719,110 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum) {
   O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
 }
 
+void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
+                                                int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  unsigned OffImm = MO2.getImm();
+  if (OffImm)  // Don't print +0.
+    O << ", #+" << OffImm;
+  O << "]";
+}
 
-void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm();
+void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
+                                               int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #+" << OffImm;
+  O << "]";
+}
+
+void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
+                                                     int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else if (OffImm > 0)
+    O << "#+" << OffImm;
+}
+
+void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
+                                                int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+  const MachineOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  if (MO2.getReg()) {
+    O << ", +"
+      << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+
+    unsigned ShAmt = MO3.getImm();
+    if (ShAmt) {
+      assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+      O << ", lsl #" << ShAmt;
+    }
+  }
+  O << "]";
+}
+
+
+//===--------------------------------------------------------------------===//
+
+void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   if (CC != ARMCC::AL)
     O << ARMCondCodeToString(CC);
 }
 
-void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){
-  unsigned Reg = MI->getOperand(opNum).getReg();
+void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
+  unsigned Reg = MI->getOperand(OpNum).getReg();
   if (Reg) {
     assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
     O << 's';
   }
 }
 
-void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
-  int Id = (int)MI->getOperand(opNum).getImm();
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
+  int Id = (int)MI->getOperand(OpNum).getImm();
   O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
 }
 
-void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
   O << "{";
-  for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
     printOperand(MI, i);
     if (i != e-1) O << ", ";
   }
   O << "}";
 }
 
-void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
                                        const char *Modifier) {
   assert(Modifier && "This operand only works with a modifier!");
   // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
   // data itself.
   if (!strcmp(Modifier, "label")) {
-    unsigned ID = MI->getOperand(OpNo).getImm();
+    unsigned ID = MI->getOperand(OpNum).getImm();
     O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
       << '_' << ID << ":\n";
   } else {
     assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
-    unsigned CPI = MI->getOperand(OpNo).getIndex();
+    unsigned CPI = MI->getOperand(OpNum).getIndex();
 
     const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
     
@@ -746,9 +834,9 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
   }
 }
 
-void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
-  const MachineOperand &MO1 = MI->getOperand(OpNo);
-  const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
   unsigned JTI = MO1.getIndex();
   O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
     << '_' << JTI << '_' << MO2.getImm() << ":\n";
@@ -787,7 +875,7 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
 }
 
 
-bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                     unsigned AsmVariant, const char *ExtraCode){
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -797,10 +885,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     default: return true;  // Unknown modifier.
     case 'a': // Don't print "#" before a global var name or constant.
     case 'c': // Don't print "$" before a global var name or constant.
-      printOperand(MI, OpNo, "no_hash");
+      printOperand(MI, OpNum, "no_hash");
       return false;
     case 'P': // Print a VFP double precision register.
-      printOperand(MI, OpNo);
+      printOperand(MI, OpNum);
       return false;
     case 'Q':
       if (TM.getTargetData()->isLittleEndian())
@@ -812,24 +900,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       // Fallthrough
     case 'H': // Write second word of DI / DF reference.  
       // Verify that this operand has two consecutive registers.
-      if (!MI->getOperand(OpNo).isReg() ||
-          OpNo+1 == MI->getNumOperands() ||
-          !MI->getOperand(OpNo+1).isReg())
+      if (!MI->getOperand(OpNum).isReg() ||
+          OpNum+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNum+1).isReg())
         return true;
-      ++OpNo;   // Return the high-part.
+      ++OpNum;   // Return the high-part.
     }
   }
   
-  printOperand(MI, OpNo);
+  printOperand(MI, OpNum);
   return false;
 }
 
 bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNo, unsigned AsmVariant,
+                                          unsigned OpNum, unsigned AsmVariant,
                                           const char *ExtraCode) {
   if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier.
-  printAddrMode2Operand(MI, OpNo);
+  printAddrMode2Operand(MI, OpNum);
   return false;
 }
 
@@ -1138,9 +1226,8 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
                                              ARMBaseTargetMachine &tm,
-                                             CodeGenOpt::Level OptLevel,
                                              bool verbose) {
-  return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 namespace {
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index e665ed9..9c46fe0 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -24,5 +24,10 @@ add_llvm_target(ARMCodeGen
   ARMSubtarget.cpp
   ARMTargetAsmInfo.cpp
   ARMTargetMachine.cpp
-  ThumbInstrInfo.cpp
+  Thumb1InstrInfo.cpp
+  Thumb1RegisterInfo.cpp
+  Thumb2InstrInfo.cpp
+  Thumb2RegisterInfo.cpp
   )
+
+target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 4223699..f3377f9 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -530,3 +530,10 @@ those operations and the ARMv6 scalar versions.
 
 ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
 ARMInstrInfo::commuteInstruction() to support it.
+
+//===---------------------------------------------------------------------===//
+
+Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
+LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
+ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
+while ARMConstantIslandPass only need to worry about LDR (literal).
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 0000000..e13a811
--- /dev/null
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,304 @@
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "Thumb1InstrInfo.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+bool Thumb1InstrInfo::isMoveInstr(const MachineInstr &MI,
+                                  unsigned &SrcReg, unsigned &DstReg,
+                                  unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  unsigned oc = MI.getOpcode();
+  switch (oc) {
+  default:
+    return false;
+  case ARM::tMOVr:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2hir:
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid Thumb MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned Thumb1InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::tRestore:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned Thumb1InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::tSpill:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   const TargetRegisterClass *DestRC,
+                                   const TargetRegisterClass *SrcRC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (DestRC == ARM::GPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+      return true;
+    }
+  } else if (DestRC == ARM::tGPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool Thumb1InstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  if (Ops.size() != 1) return false;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        return false;
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        return false;
+    }
+    return true;
+  }
+  }
+
+  return false;
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tSpill))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void Thumb1InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                     bool isKill,
+                                     SmallVectorImpl<MachineOperand> &Addr,
+                                     const TargetRegisterClass *RC,
+                                     SmallVectorImpl<MachineInstr*> &NewMIs) const{
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  assert(RC == ARM::GPRRegisterClass && "Unknown regclass!");
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+  }
+
+  MachineInstrBuilder MIB =
+    BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void Thumb1InstrInfo::
+loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+bool Thumb1InstrInfo::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    MIB.addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool Thumb1InstrInfo::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (CSI.empty())
+    return false;
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      PopMI->setDesc(get(ARM::tPOP_RET));
+      MI = MBB.erase(MI);
+    }
+    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+  }
+
+  // It's illegal to emit pop instruction without operands.
+  if (PopMI->getNumOperands() > 0)
+    MBB.insert(MI, PopMI);
+
+  return true;
+}
+
+MachineInstr *Thumb1InstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
+  if (Ops.size() != 1) return NULL;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  MachineInstr *NewMI = NULL;
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        break;
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        break;
+      bool isDead = MI->getOperand(0).isDead();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addFrameIndex(FI).addImm(0);
+    }
+    break;
+  }
+  }
+
+  return NewMI;
+}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 0000000..1bfa1d0
--- /dev/null
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,93 @@
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+  Thumb1RegisterInfo RI;
+public:
+  explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI) const;
+
+  bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const;
+ 
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr* LoadMI) const {
+    return 0;
+  }
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 0000000..92f01d1
--- /dev/null
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,755 @@
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ThumbRegScavenging("enable-thumb-reg-scavenging",
+                   cl::Hidden,
+                   cl::desc("Enable register scavenging on Thumb"));
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const TargetInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           unsigned DestReg, int Val,
+                                           const TargetInstrInfo *TII,
+                                           DebugLoc dl) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg)
+    .addConstantPoolIndex(Idx);
+}
+
+const TargetRegisterClass*
+Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+  if (isARMLowRegister(Reg))
+    return ARM::tGPRRegisterClass;
+  switch (Reg) {
+   default:
+    break;
+   case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
+   case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
+    return ARM::GPRRegisterClass;
+  }
+
+  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+}
+
+bool
+Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  return ThumbRegScavenging;
+}
+
+bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, unsigned BaseReg,
+                              int NumBytes, bool CanChangeCC,
+                              const TargetInstrInfo &TII,
+                              const Thumb1RegisterInfo& MRI,
+                              DebugLoc dl) {
+    bool isHigh = !isARMLowRegister(DestReg) ||
+                  (BaseReg != 0 && !isARMLowRegister(BaseReg));
+    bool isSub = false;
+    // Subtract doesn't have high register version. Load the negative value
+    // if either base or dest register is a high register. Also, if do not
+    // issue sub as part of the sequence if condition register is to be
+    // preserved.
+    if (NumBytes < 0 && !isHigh && CanChangeCC) {
+      isSub = true;
+      NumBytes = -NumBytes;
+    }
+    unsigned LdReg = DestReg;
+    if (DestReg == ARM::SP) {
+      assert(BaseReg == ARM::SP && "Unexpected!");
+      LdReg = ARM::R3;
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    }
+
+    if (NumBytes <= 255 && NumBytes >= 0)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+    else if (NumBytes < 0 && NumBytes >= -255) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+        .addReg(LdReg, RegState::Kill);
+    } else
+      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl);
+
+    // Emit add / sub.
+    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
+                                            TII.get(Opc), DestReg);
+    if (DestReg == ARM::SP || isSub)
+      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+    else
+      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+    if (DestReg == ARM::SP)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+                          unsigned NumBits, unsigned Scale) {
+  unsigned NumMIs = 0;
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+  if (Opc == ARM::tADDrSPi) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    NumMIs++;
+    NumBits = 8;
+    Scale = 1;  // Followed by a number of tADDi8.
+    Chunk = ((1 << NumBits) - 1) * Scale;
+  }
+
+  NumMIs += Bytes / Chunk;
+  if ((Bytes % Chunk) != 0)
+    NumMIs++;
+  if (ExtraOpc)
+    NumMIs++;
+  return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const Thumb1RegisterInfo& MRI,
+                               DebugLoc dl) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  bool DstNotEqBase = false;
+  unsigned NumBits = 1;
+  unsigned Scale = 1;
+  int Opc = 0;
+  int ExtraOpc = 0;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    NumBits = 7;
+    Scale = 4;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    // r1 = add sp, 403
+    // =>
+    // r1 = add sp, 100 * 4
+    // r1 = add r1, 3
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    NumBits = 8;
+    Scale = 4;
+    Opc = ARM::tADDrSPi;
+  } else {
+    // sp = sub sp, c
+    // r1 = sub sp, c
+    // r8 = sub sp, c
+    if (DestReg != BaseReg)
+      DstNotEqBase = true;
+    NumBits = 8;
+    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    isTwoAddr = true;
+  }
+
+  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+  if (NumMIs > Threshold) {
+    // This will expand into too many instructions. Load the immediate from a
+    // constpool entry.
+    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+                             MRI, dl);
+    return;
+  }
+
+  if (DstNotEqBase) {
+    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+      unsigned Chunk = (1 << 3) - 1;
+      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+      Bytes -= ThisVal;
+      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill);
+    }
+    BaseReg = DestReg;
+  }
+
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    ThisVal /= Scale;
+    // Build the new tADD / tSUB.
+    if (isTwoAddr)
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(DestReg).addImm(ThisVal);
+    else {
+      bool isKill = BaseReg != ARM::SP;
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      BaseReg = DestReg;
+
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Scale = 1;
+        Chunk = ((1 << NumBits) - 1) * Scale;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc)
+    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
+      .addReg(DestReg, RegState::Kill)
+      .addImm(((unsigned)NumBytes) & 3);
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo &TII, DebugLoc dl,
+                         const Thumb1RegisterInfo &MRI,
+                         int NumBytes) {
+  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, dl);
+}
+
+void Thumb1RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+      } else {
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII,
+                              const Thumb1RegisterInfo& MRI,
+                              DebugLoc dl) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+  if (Imm > 0)
+    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+  if (isSub)
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
+      .addReg(DestReg, RegState::Kill);
+}
+
+void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                             int SPAdj, RegScavenger *RS) const{
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (hasFP(MF)) {
+    assert(SPAdj == 0 && "Unexpected");
+    // There is alloca()'s in this function, must reference off the frame
+    // pointer instead.
+    FrameReg = getFrameRegister(MF);
+    Offset -= AFI->getFramePtrSpillOffset();
+  }
+
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+  if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(i+1).getImm();
+
+    // Can't use tADDrSPi if it's based off the frame pointer.
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (FrameReg != ARM::SP) {
+      Opcode = ARM::tADDi3;
+      MI.setDesc(TII.get(ARM::tADDi3));
+      NumBits = 3;
+    } else {
+      NumBits = 8;
+      Scale = 4;
+      assert((Offset & 3) == 0 &&
+             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+    }
+
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVhir2lor));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    }
+
+    // Common case: small offset, fits into instruction.
+    unsigned Mask = (1 << NumBits) - 1;
+    if (((Offset / Scale) & ~Mask) == 0) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+      return;
+    }
+
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+    // MI would expand into a large number of instructions. Don't try to
+    // simplify the immediate.
+    if (NumMIs > 2) {
+      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+                                *this, dl);
+      MBB.erase(II);
+      return;
+    }
+
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Mask);
+      Offset = (Offset - Mask * Scale);
+      MachineBasicBlock::iterator NII = next(II);
+      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+                                *this, dl);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+      MI.setDesc(TII.get(ARM::tADDhirr));
+      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+    }
+    return;
+  } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrModeT1_s: {
+      ImmIdx = i+1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+      Scale = 4;
+      break;
+    }
+    default:
+      assert(0 && "Unsupported addressing mode!");
+      abort();
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+
+    // Common case: small offset, fits into instruction.
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with sp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      return;
+    }
+
+    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+    if (AddrMode == ARMII::AddrModeT1_s) {
+      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+      // a different base register.
+      NumBits = 5;
+      Mask = (1 << NumBits) - 1;
+    }
+    // If this is a thumb spill / restore, we will be using a constpool load to
+    // materialize the offset.
+    if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore)
+      ImmOp.ChangeToImmediate(0);
+    else {
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  if (Desc.mayLoad()) {
+    // Use the destination register to materialize sp + offset.
+    unsigned TmpReg = MI.getOperand(0).getReg();
+    bool UseRR = false;
+    if (Opcode == ARM::tRestore) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tLDR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)
+      // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else  // tLDR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+  } else if (Desc.mayStore()) {
+    // FIXME! This is horrific!!! We need register scavenging.
+    // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+    // also a ABI register so it's possible that is is the register that is
+    // being storing here. If that's the case, we do the following:
+    // r12 = r2
+    // Use r2 to materialize sp + offset
+    // str r3, r2
+    // r2 = r12
+    unsigned ValReg = MI.getOperand(0).getReg();
+    unsigned TmpReg = ARM::R3;
+    bool UseRR = false;
+    if (ValReg == ARM::R3) {
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R2, RegState::Kill);
+      TmpReg = ARM::R2;
+    }
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    if (Opcode == ARM::tSpill) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tSTR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)  // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else // tSTR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+
+    MachineBasicBlock::iterator NII = next(II);
+    if (ValReg == ARM::R3)
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
+        .addReg(ARM::R12, RegState::Kill);
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+  } else
+    assert(false && "Unexpected opcode!");
+}
+
+void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = (MBBI != MBB.end() ?
+                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+  // Check if R3 is live in. It might have to be used as a scratch register.
+  for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
+         E = MF.getRegInfo().livein_end(); I != E; ++I) {
+    if (I->first == ARM::R3) {
+      AFI->setR3IsLiveIn(true);
+      break;
+    }
+  }
+
+  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+  NumBytes = (NumBytes + 3) & ~3;
+  MFI->setStackSize(NumBytes);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Darwin ABI requires FP to point to the stack slot that contains the
+  // previous FP.
+  if (STI.isTargetDarwin() || hasFP(MF)) {
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Insert it after all the callee-save spills.
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF)) {
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+  }
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  return (MI->getOpcode() == ARM::tRestore &&
+          MI->getOperand(1).isFI() &&
+          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
+                                      MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  assert((MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / FLDD.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    if (hasFP(MF)) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot or target is ELF and the function has FP.
+      if (NumBytes)
+        emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+                                  TII, *this, dl);
+      else
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+          .addReg(FramePtr);
+    } else {
+      if (MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI &&
+          prior(MBBI)->getOpcode() == ARM::tPOP) {
+        MachineBasicBlock::iterator PMBBI = prior(MBBI);
+        emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes);
+      } else
+        emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+    }
+  }
+
+  if (VARegSaveSize) {
+    // Epilogue for vararg functions: pop LR to R3 and branch off it.
+    // FIXME: Verify this is still ok when R3 is no longer being reserved.
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
+
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+    MBB.erase(MBBI);
+  }
+}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 0000000..6d4f1f0
--- /dev/null
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,60 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb1RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         unsigned DestReg, int Val,
+                         const TargetInstrInfo *TII,
+                         DebugLoc dl) const;
+
+  /// Code Generation virtual methods...
+  const TargetRegisterClass *
+    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/lib/Target/ARM/ThumbInstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 075d940..35d09fd 100644
--- a/lib/Target/ARM/ThumbInstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ThumbInstrInfo.cpp - Thumb Instruction Information --------*- C++ -*-===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the Thumb implementation of the TargetInstrInfo class.
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,17 +18,17 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/ADT/SmallVector.h"
-#include "ThumbInstrInfo.h"
+#include "Thumb2InstrInfo.h"
 
 using namespace llvm;
 
-ThumbInstrInfo::ThumbInstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI) {
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
-bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI,
-                                 unsigned &SrcReg, unsigned &DstReg,
-                                 unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+bool Thumb2InstrInfo::isMoveInstr(const MachineInstr &MI,
+                                  unsigned &SrcReg, unsigned &DstReg,
+                                  unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
   SrcSubIdx = DstSubIdx = 0; // No sub-registers.
 
   unsigned oc = MI.getOpcode();
@@ -50,8 +50,8 @@ bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI,
   }
 }
 
-unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                             int &FrameIndex) const {
+unsigned Thumb2InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default: break;
   // FIXME: Thumb2
@@ -67,8 +67,8 @@ unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                            int &FrameIndex) const {
+unsigned Thumb2InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default: break;
   // FIXME: Thumb2
@@ -84,11 +84,11 @@ unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I,
-                                  unsigned DestReg, unsigned SrcReg,
-                                  const TargetRegisterClass *DestRC,
-                                  const TargetRegisterClass *SrcRC) const {
+bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   const TargetRegisterClass *DestRC,
+                                   const TargetRegisterClass *SrcRC) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -114,7 +114,38 @@ bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   return false;
 }
 
-void ThumbInstrInfo::
+bool Thumb2InstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  if (Ops.size() != 1) return false;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        return false;
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        return false;
+    }
+    return true;
+  }
+  }
+
+  return false;
+}
+
+void Thumb2InstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
@@ -131,11 +162,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                    bool isKill,
-                                    SmallVectorImpl<MachineOperand> &Addr,
-                                    const TargetRegisterClass *RC,
-                                   SmallVectorImpl<MachineInstr*> &NewMIs) const{
+void Thumb2InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                     bool isKill,
+                                     SmallVectorImpl<MachineOperand> &Addr,
+                                     const TargetRegisterClass *RC,
+                                     SmallVectorImpl<MachineInstr*> &NewMIs) const{
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
 
@@ -153,7 +184,7 @@ void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
   return;
 }
 
-void ThumbInstrInfo::
+void Thumb2InstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
@@ -169,7 +200,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ThumbInstrInfo::
+void Thumb2InstrInfo::
 loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                 SmallVectorImpl<MachineOperand> &Addr,
                 const TargetRegisterClass *RC,
@@ -189,7 +220,7 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   return;
 }
 
-bool ThumbInstrInfo::
+bool Thumb2InstrInfo::
 spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
                           const std::vector<CalleeSavedInfo> &CSI) const {
@@ -209,7 +240,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-bool ThumbInstrInfo::
+bool Thumb2InstrInfo::
 restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MI,
                             const std::vector<CalleeSavedInfo> &CSI) const {
@@ -240,11 +271,10 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-MachineInstr *ThumbInstrInfo::
+MachineInstr *Thumb2InstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
   if (Ops.size() != 1) return NULL;
-  const ARMRegisterInfo &RI = getRegisterInfo();
 
   unsigned OpNum = Ops[0];
   unsigned Opc = MI->getOpcode();
@@ -258,7 +288,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
-      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
         // tSpill cannot take a high register operand.
         break;
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
@@ -266,7 +296,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
         .addFrameIndex(FI).addImm(0);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
         // tRestore cannot target a high register operand.
         break;
       bool isDead = MI->getOperand(0).isDead();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 0000000..84dcb49
--- /dev/null
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,93 @@
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+  Thumb2RegisterInfo RI;
+public:
+  explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI) const;
+
+  bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr* LoadMI) const {
+    return 0;
+  }
+};
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 0000000..0f0c0e4
--- /dev/null
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,755 @@
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+Thumb2RegScavenging("enable-thumb2-reg-scavenging",
+                    cl::Hidden,
+                    cl::desc("Enable register scavenging on Thumb-2"));
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const TargetInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           unsigned DestReg, int Val,
+                                           const TargetInstrInfo *TII,
+                                           DebugLoc dl) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg)
+    .addConstantPoolIndex(Idx);
+}
+
+const TargetRegisterClass*
+Thumb2RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+  if (isARMLowRegister(Reg))
+    return ARM::tGPRRegisterClass;
+  switch (Reg) {
+   default:
+    break;
+   case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
+   case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
+    return ARM::GPRRegisterClass;
+  }
+
+  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+}
+
+bool
+Thumb2RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  return Thumb2RegScavenging;
+}
+
+bool Thumb2RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, unsigned BaseReg,
+                              int NumBytes, bool CanChangeCC,
+                              const TargetInstrInfo &TII,
+                              const Thumb2RegisterInfo& MRI,
+                              DebugLoc dl) {
+    bool isHigh = !isARMLowRegister(DestReg) ||
+                  (BaseReg != 0 && !isARMLowRegister(BaseReg));
+    bool isSub = false;
+    // Subtract doesn't have high register version. Load the negative value
+    // if either base or dest register is a high register. Also, if do not
+    // issue sub as part of the sequence if condition register is to be
+    // preserved.
+    if (NumBytes < 0 && !isHigh && CanChangeCC) {
+      isSub = true;
+      NumBytes = -NumBytes;
+    }
+    unsigned LdReg = DestReg;
+    if (DestReg == ARM::SP) {
+      assert(BaseReg == ARM::SP && "Unexpected!");
+      LdReg = ARM::R3;
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    }
+
+    if (NumBytes <= 255 && NumBytes >= 0)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+    else if (NumBytes < 0 && NumBytes >= -255) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+        .addReg(LdReg, RegState::Kill);
+    } else
+      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl);
+
+    // Emit add / sub.
+    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
+                                            TII.get(Opc), DestReg);
+    if (DestReg == ARM::SP || isSub)
+      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+    else
+      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+    if (DestReg == ARM::SP)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+                          unsigned NumBits, unsigned Scale) {
+  unsigned NumMIs = 0;
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+  if (Opc == ARM::tADDrSPi) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    NumMIs++;
+    NumBits = 8;
+    Scale = 1;  // Followed by a number of tADDi8.
+    Chunk = ((1 << NumBits) - 1) * Scale;
+  }
+
+  NumMIs += Bytes / Chunk;
+  if ((Bytes % Chunk) != 0)
+    NumMIs++;
+  if (ExtraOpc)
+    NumMIs++;
+  return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const Thumb2RegisterInfo& MRI,
+                               DebugLoc dl) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  bool DstNotEqBase = false;
+  unsigned NumBits = 1;
+  unsigned Scale = 1;
+  int Opc = 0;
+  int ExtraOpc = 0;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    NumBits = 7;
+    Scale = 4;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    // r1 = add sp, 403
+    // =>
+    // r1 = add sp, 100 * 4
+    // r1 = add r1, 3
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    NumBits = 8;
+    Scale = 4;
+    Opc = ARM::tADDrSPi;
+  } else {
+    // sp = sub sp, c
+    // r1 = sub sp, c
+    // r8 = sub sp, c
+    if (DestReg != BaseReg)
+      DstNotEqBase = true;
+    NumBits = 8;
+    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    isTwoAddr = true;
+  }
+
+  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+  if (NumMIs > Threshold) {
+    // This will expand into too many instructions. Load the immediate from a
+    // constpool entry.
+    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+                             MRI, dl);
+    return;
+  }
+
+  if (DstNotEqBase) {
+    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+      unsigned Chunk = (1 << 3) - 1;
+      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+      Bytes -= ThisVal;
+      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill);
+    }
+    BaseReg = DestReg;
+  }
+
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    ThisVal /= Scale;
+    // Build the new tADD / tSUB.
+    if (isTwoAddr)
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(DestReg).addImm(ThisVal);
+    else {
+      bool isKill = BaseReg != ARM::SP;
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      BaseReg = DestReg;
+
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Scale = 1;
+        Chunk = ((1 << NumBits) - 1) * Scale;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc)
+    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
+      .addReg(DestReg, RegState::Kill)
+      .addImm(((unsigned)NumBytes) & 3);
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo &TII, DebugLoc dl,
+                         const Thumb2RegisterInfo &MRI,
+                         int NumBytes) {
+  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, dl);
+}
+
+void Thumb2RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+      } else {
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII,
+                              const Thumb2RegisterInfo& MRI,
+                              DebugLoc dl) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+  if (Imm > 0)
+    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+  if (isSub)
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
+      .addReg(DestReg, RegState::Kill);
+}
+
+void Thumb2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                             int SPAdj, RegScavenger *RS) const{
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (hasFP(MF)) {
+    assert(SPAdj == 0 && "Unexpected");
+    // There is alloca()'s in this function, must reference off the frame
+    // pointer instead.
+    FrameReg = getFrameRegister(MF);
+    Offset -= AFI->getFramePtrSpillOffset();
+  }
+
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+  if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(i+1).getImm();
+
+    // Can't use tADDrSPi if it's based off the frame pointer.
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (FrameReg != ARM::SP) {
+      Opcode = ARM::tADDi3;
+      MI.setDesc(TII.get(ARM::tADDi3));
+      NumBits = 3;
+    } else {
+      NumBits = 8;
+      Scale = 4;
+      assert((Offset & 3) == 0 &&
+             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+    }
+
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVhir2lor));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    }
+
+    // Common case: small offset, fits into instruction.
+    unsigned Mask = (1 << NumBits) - 1;
+    if (((Offset / Scale) & ~Mask) == 0) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+      return;
+    }
+
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+    // MI would expand into a large number of instructions. Don't try to
+    // simplify the immediate.
+    if (NumMIs > 2) {
+      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+                                *this, dl);
+      MBB.erase(II);
+      return;
+    }
+
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Mask);
+      Offset = (Offset - Mask * Scale);
+      MachineBasicBlock::iterator NII = next(II);
+      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+                                *this, dl);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+      MI.setDesc(TII.get(ARM::tADDhirr));
+      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+    }
+    return;
+  } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrModeT1_s: {
+      ImmIdx = i+1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+      Scale = 4;
+      break;
+    }
+    default:
+      assert(0 && "Unsupported addressing mode!");
+      abort();
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+
+    // Common case: small offset, fits into instruction.
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with sp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      return;
+    }
+
+    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+    if (AddrMode == ARMII::AddrModeT1_s) {
+      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+      // a different base register.
+      NumBits = 5;
+      Mask = (1 << NumBits) - 1;
+    }
+    // If this is a thumb spill / restore, we will be using a constpool load to
+    // materialize the offset.
+    if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore)
+      ImmOp.ChangeToImmediate(0);
+    else {
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  if (Desc.mayLoad()) {
+    // Use the destination register to materialize sp + offset.
+    unsigned TmpReg = MI.getOperand(0).getReg();
+    bool UseRR = false;
+    if (Opcode == ARM::tRestore) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tLDR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)
+      // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else  // tLDR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+  } else if (Desc.mayStore()) {
+    // FIXME! This is horrific!!! We need register scavenging.
+    // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+    // also a ABI register so it's possible that is is the register that is
+    // being storing here. If that's the case, we do the following:
+    // r12 = r2
+    // Use r2 to materialize sp + offset
+    // str r3, r2
+    // r2 = r12
+    unsigned ValReg = MI.getOperand(0).getReg();
+    unsigned TmpReg = ARM::R3;
+    bool UseRR = false;
+    if (ValReg == ARM::R3) {
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R2, RegState::Kill);
+      TmpReg = ARM::R2;
+    }
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    if (Opcode == ARM::tSpill) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tSTR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)  // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else // tSTR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+
+    MachineBasicBlock::iterator NII = next(II);
+    if (ValReg == ARM::R3)
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
+        .addReg(ARM::R12, RegState::Kill);
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+  } else
+    assert(false && "Unexpected opcode!");
+}
+
+void Thumb2RegisterInfo::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = (MBBI != MBB.end() ?
+                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+  // Check if R3 is live in. It might have to be used as a scratch register.
+  for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
+         E = MF.getRegInfo().livein_end(); I != E; ++I) {
+    if (I->first == ARM::R3) {
+      AFI->setR3IsLiveIn(true);
+      break;
+    }
+  }
+
+  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+  NumBytes = (NumBytes + 3) & ~3;
+  MFI->setStackSize(NumBytes);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Darwin ABI requires FP to point to the stack slot that contains the
+  // previous FP.
+  if (STI.isTargetDarwin() || hasFP(MF)) {
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Insert it after all the callee-save spills.
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF)) {
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+  }
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  return (MI->getOpcode() == ARM::tRestore &&
+          MI->getOperand(1).isFI() &&
+          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void Thumb2RegisterInfo::emitEpilogue(MachineFunction &MF,
+                                      MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  assert((MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / FLDD.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    if (hasFP(MF)) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot or target is ELF and the function has FP.
+      if (NumBytes)
+        emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+                                  TII, *this, dl);
+      else
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+          .addReg(FramePtr);
+    } else {
+      if (MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI &&
+          prior(MBBI)->getOpcode() == ARM::tPOP) {
+        MachineBasicBlock::iterator PMBBI = prior(MBBI);
+        emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes);
+      } else
+        emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+    }
+  }
+
+  if (VARegSaveSize) {
+    // Epilogue for vararg functions: pop LR to R3 and branch off it.
+    // FIXME: Verify this is still ok when R3 is no longer being reserved.
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
+
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+    MBB.erase(MBBI);
+  }
+}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 0000000..d379c31
--- /dev/null
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,60 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb2RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         unsigned DestReg, int Val,
+                         const TargetInstrInfo *TII,
+                         DebugLoc dl) const;
+
+  /// Code Generation virtual methods...
+  const TargetRegisterClass *
+    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/lib/Target/ARM/ThumbInstrInfo.h b/lib/Target/ARM/ThumbInstrInfo.h
deleted file mode 100644
index dcf1095..0000000
--- a/lib/Target/ARM/ThumbInstrInfo.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- ThumbInstrInfo.h - Thumb Instruction Information ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef THUMBINSTRUCTIONINFO_H
-#define THUMBINSTRUCTIONINFO_H
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMRegisterInfo.h"
-#include "ARM.h"
-#include "ARMInstrInfo.h"
-
-namespace llvm {
-  class ARMSubtarget;
-
-class ThumbInstrInfo : public ARMBaseInstrInfo {
-public:
-  explicit ThumbInstrInfo(const ARMSubtarget &STI);
-
-  /// Return true if the instruction is a register to register move and return
-  /// the source and dest operands and their sub-register indices by reference.
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg, unsigned &DstReg,
-                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
-
-  virtual bool copyRegToReg(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator I,
-                            unsigned DestReg, unsigned SrcReg,
-                            const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
-
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
-
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-
-  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                              MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
-                                              MachineInstr* LoadMI) const {
-    return 0;
-  }
-
-  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                              MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
-                                              int FrameIndex) const;
-};
-}
-
-#endif // THUMBINSTRUCTIONINFO_H
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
index 2815176..0818e25 100644
--- a/lib/Target/Alpha/Alpha.h
+++ b/lib/Target/Alpha/Alpha.h
@@ -27,7 +27,6 @@ namespace llvm {
   FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
   FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS,
                                            TargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
   FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
   FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 1be1713..fa0b656 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -181,6 +181,11 @@ const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned AlphaTargetLowering::getFunctionAlignment(const Function *F) const {
+  return 4;
+}
+
 static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
   MVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
@@ -360,7 +365,8 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
 std::pair<SDValue, SDValue>
 AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, 
                                  bool RetSExt, bool RetZExt, bool isVarArg,
-                                 bool isInreg, unsigned CallingConv, 
+                                 bool isInreg, unsigned NumFixedArgs,
+                                 unsigned CallingConv, 
                                  bool isTailCall, SDValue Callee, 
                                  ArgListTy &Args, SelectionDAG &DAG,
                                  DebugLoc dl) {
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index fdd817c..4925367 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -86,9 +86,9 @@ namespace llvm {
     /// actual call.
     virtual std::pair<SDValue, SDValue>
     LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt,
-                bool isVarArg, bool isInreg, unsigned CC, bool isTailCall, 
-                SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, 
-                DebugLoc dl);
+                bool isVarArg, bool isInreg, unsigned NumFixedArgs, unsigned CC,
+                bool isTailCall, SDValue Callee, ArgListTy &Args,
+                SelectionDAG &DAG, DebugLoc dl);
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
@@ -103,6 +103,9 @@ namespace llvm {
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     // Helpers for custom lowering.
     void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 229f9d4..76a594f 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -289,19 +289,22 @@ MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        if (Ops[0] == 0) {  // move -> store
          unsigned InReg = MI->getOperand(1).getReg();
          bool isKill = MI->getOperand(1).isKill();
+         bool isUndef = MI->getOperand(1).isUndef();
          Opc = (Opc == Alpha::BISr) ? Alpha::STQ : 
            ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT);
          NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
-           .addReg(InReg, getKillRegState(isKill))
+           .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef))
            .addFrameIndex(FrameIndex)
            .addReg(Alpha::F31);
        } else {           // load -> move
          unsigned OutReg = MI->getOperand(0).getReg();
          bool isDead = MI->getOperand(0).isDead();
+         bool isUndef = MI->getOperand(0).isUndef();
          Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : 
            ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT);
          NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
-           .addReg(OutReg, RegState::Define | getDeadRegState(isDead))
+           .addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
+                   getUndefRegState(isUndef))
            .addFrameIndex(FrameIndex)
            .addReg(Alpha::F31);
        }
@@ -470,6 +473,7 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29,
                               &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
   assert(Ok && "Couldn't assign to global base register!");
+  Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Alpha::R29);
 
   AlphaFI->setGlobalBaseReg(GlobalBaseReg);
@@ -496,6 +500,7 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26,
                               &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
   assert(Ok && "Couldn't assign to global return address register!");
+  Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Alpha::R26);
 
   AlphaFI->setGlobalRetAddr(GlobalRetAddr);
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 10952eb..060089c 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -94,7 +94,7 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
@@ -104,7 +104,7 @@ bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
   return false;
 }
@@ -115,7 +115,7 @@ bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
   return false;
 }
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 946ca55..26684c7 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -41,7 +41,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index e0c0a64..982ef5e 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -38,9 +38,8 @@ namespace {
     ///
 
     explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(o, tm, T, OL, V) {}
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(o, tm, T, V) {}
 
     virtual const char *getPassName() const {
       return "Alpha Assembly Printer";
@@ -70,9 +69,8 @@ namespace {
 ///
 FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o,
                                                TargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 #include "AlphaGenAsmWriter.inc"
@@ -155,7 +153,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   SwitchToSection(TAI->SectionForGlobal(F));
 
-  EmitAlignment(4, F);
+  EmitAlignment(MF.getAlignment(), F);
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index 1e535f7..2a382d5 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -23,3 +23,5 @@ add_llvm_target(AlphaCodeGen
   AlphaTargetAsmInfo.cpp
   AlphaTargetMachine.cpp
   )
+
+target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 02b625b..2847d0b 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -50,9 +50,8 @@ namespace {
     std::set<std::string> FnStubs, GVStubs;
   public:
     explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                           bool V) :
-      AsmPrinter(O, TM, T, OL, V) {}
+                           const TargetAsmInfo *T, bool V) :
+      AsmPrinter(O, TM, T, V) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -290,9 +289,8 @@ namespace {
     DwarfWriter *DW;
   public:
     explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level F,
-                             bool V)
-      : SPUAsmPrinter(O, TM, T, F, V), DW(0) {}
+                             const TargetAsmInfo *T, bool V)
+      : SPUAsmPrinter(O, TM, T, V), DW(0) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -433,7 +431,7 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
   const Function *F = MF.getFunction();
 
   SwitchToSection(TAI->SectionForGlobal(F));
-  EmitAlignment(3, F);
+  EmitAlignment(MF.getAlignment(), F);
 
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
@@ -603,9 +601,8 @@ bool LinuxAsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
                                             SPUTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose) {
-  return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 // Force static initialization.
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index e3e12ac..8a55845 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -22,3 +22,5 @@ add_llvm_target(CellSPUCodeGen
   SPUTargetAsmInfo.cpp
   SPUTargetMachine.cpp
   )
+
+target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 77a062e..10d1110 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -26,7 +26,6 @@ namespace llvm {
   FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
   FunctionPass *createSPUAsmPrinterPass(raw_ostream &o,
                                         SPUTargetMachine &tm,
-                                        CodeGenOpt::Level OptLevel,
                                         bool verbose);
 
   /*--== Utility functions/predicates/etc used all over the place: --==*/
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index b286443..d8a7776 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -113,7 +113,7 @@ namespace {
     const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
     std::pair<SDValue, SDValue> CallInfo =
             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                            CallingConv::C, false, Callee, Args, DAG,
+                            0, CallingConv::C, false, Callee, Args, DAG,
                             Op.getDebugLoc());
 
     return CallInfo.first;
@@ -481,6 +481,11 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
   return ((i != node_names.end()) ? i->second : 0);
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
+  return 3;
+}
+
 //===----------------------------------------------------------------------===//
 // Return the Cell SPU's SETCC result type
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 866c632..b1583f4 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -89,7 +89,6 @@ namespace llvm {
     public TargetLowering
   {
     int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-    int ReturnAddrIndex;              // FrameIndex for return slot.
     SPUTargetMachine &SPUTM;
 
   public:
@@ -148,6 +147,9 @@ namespace llvm {
     virtual bool isLegalAddressImmediate(GlobalValue *) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 }
 
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 4af995a..e629c8d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -491,19 +491,22 @@ SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       if (FrameIndex < SPUFrameInfo::maxFrameOffset()) {
         MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(),
                                           get(SPU::STQDr32));
 
-        MIB.addReg(InReg, getKillRegState(isKill));
+        MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef));
         NewMI = addFrameReference(MIB, FrameIndex);
       }
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc));
 
-      MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead));
+      MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
+                 getUndefRegState(isUndef));
       Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset())
         ? SPU::STQDr32 : SPU::STQXr32;
       NewMI = addFrameReference(MIB, FrameIndex);
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 256d63d..2470972 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -94,6 +94,6 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index d8fe300..4c28521 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -43,7 +43,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             SPUTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index 6b34a4e..d85c0ea 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -38,9 +38,8 @@ namespace {
     std::set<std::string> ExternalFunctionNames, ExternalObjectNames;
   public:
     explicit IA64AsmPrinter(raw_ostream &O, TargetMachine &TM,
-                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                            bool V)
-      : AsmPrinter(O, TM, T, OL, V) {}
+                            const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V) {}
 
     virtual const char *getPassName() const {
       return "IA64 Assembly Printer";
@@ -137,7 +136,7 @@ bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   SwitchToSection(TAI->SectionForGlobal(F));
 
   // Print out labels for the function.
-  EmitAlignment(5);
+  EmitAlignment(MF.getAlignment());
   O << "\t.global\t" << CurrentFnName << '\n';
 
   printVisibility(CurrentFnName, F->getVisibility());
@@ -373,9 +372,8 @@ bool IA64AsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
                                               IA64TargetMachine &tm,
-                                              CodeGenOpt::Level OptLevel,
                                               bool verbose) {
-  return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 namespace {
diff --git a/lib/Target/IA64/CMakeLists.txt b/lib/Target/IA64/CMakeLists.txt
index 26f86ca..638ed2e 100644
--- a/lib/Target/IA64/CMakeLists.txt
+++ b/lib/Target/IA64/CMakeLists.txt
@@ -18,3 +18,5 @@ add_llvm_target(IA64CodeGen
   IA64TargetAsmInfo.cpp
   IA64TargetMachine.cpp
   )
+
+target_link_libraries (LLVMIA64CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h
index ec8e3d6..9c758fd 100644
--- a/lib/Target/IA64/IA64.h
+++ b/lib/Target/IA64/IA64.h
@@ -39,7 +39,6 @@ FunctionPass *createIA64BundlingPass(IA64TargetMachine &TM);
 ///
 FunctionPass *createIA64CodePrinterPass(raw_ostream &o,
                                         IA64TargetMachine &tm,
-                                        CodeGenOpt::Level OptLevel,
                                         bool verbose);
 
 } // End llvm namespace
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
index c545b9c..c622345 100644
--- a/lib/Target/IA64/IA64ISelLowering.cpp
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -148,6 +148,11 @@ MVT IA64TargetLowering::getSetCCResultType(MVT VT) const {
   return MVT::i1;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned IA64TargetLowering::getFunctionAlignment(const Function *) const {
+  return 5;
+}
+
 void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
                                         SmallVectorImpl<SDValue> &ArgValues,
                                         DebugLoc dl) {
@@ -310,7 +315,8 @@ void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
 std::pair<SDValue, SDValue>
 IA64TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                                 bool RetSExt, bool RetZExt, bool isVarArg,
-                                bool isInreg, unsigned CallingConv, 
+                                bool isInreg, unsigned NumFixedArgs,
+                                unsigned CallingConv, 
                                 bool isTailCall, SDValue Callee, 
                                 ArgListTy &Args, SelectionDAG &DAG,
                                 DebugLoc dl) {
diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h
index edf7eb8..b9c8bf2 100644
--- a/lib/Target/IA64/IA64ISelLowering.h
+++ b/lib/Target/IA64/IA64ISelLowering.h
@@ -62,7 +62,7 @@ namespace llvm {
     virtual std::pair<SDValue, SDValue>
       LowerCallTo(SDValue Chain, const Type *RetTy,
                   bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg,
-                  unsigned CC, bool isTailCall, 
+                  unsigned NumFixedArgs, unsigned CC, bool isTailCall, 
                   SDValue Callee, ArgListTy &Args, SelectionDAG &DAG,
                   DebugLoc dl);
 
@@ -70,6 +70,8 @@ namespace llvm {
     /// (currently, only "ret void")
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
     
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 }
 
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 4b05e1d..71a0a98 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -73,7 +73,7 @@ IA64TargetMachine::IA64TargetMachine(const Module &M, const std::string &FS)
 //===----------------------------------------------------------------------===//
 
 bool IA64TargetMachine::addInstSelector(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel){
+                                        CodeGenOpt::Level OptLevel) {
   PM.add(createIA64DAGToDAGInstructionSelector(*this));
   return false;
 }
@@ -91,7 +91,7 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
 
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
index a64da9f..402d833 100644
--- a/lib/Target/IA64/IA64TargetMachine.h
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -38,7 +38,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             IA64TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index ed0cd04..fc13c9e 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -26,7 +26,6 @@ namespace llvm {
                                     CodeGenOpt::Level OptLevel);
   FunctionPass *createMSP430CodePrinterPass(raw_ostream &o,
                                             MSP430TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
 } // end namespace llvm;
 
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 71b785b..b1fa3f0 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -40,9 +40,8 @@ namespace {
   class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter {
   public:
     MSP430AsmPrinter(raw_ostream &O, MSP430TargetMachine &TM,
-                     const TargetAsmInfo *TAI,
-                     CodeGenOpt::Level OL, bool V)
-      : AsmPrinter(O, TM, TAI, OL, V) {}
+                     const TargetAsmInfo *TAI, bool V)
+      : AsmPrinter(O, TM, TAI, V) {}
 
     virtual const char *getPassName() const {
       return "MSP430 Assembly Printer";
@@ -77,9 +76,8 @@ namespace {
 ///
 FunctionPass *llvm::createMSP430CodePrinterPass(raw_ostream &o,
                                                 MSP430TargetMachine &tm,
-                                                CodeGenOpt::Level OptLevel,
                                                 bool verbose) {
-  return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 bool MSP430AsmPrinter::doInitialization(Module &M) {
@@ -97,10 +95,7 @@ void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
 
   SwitchToSection(TAI->SectionForGlobal(F));
 
-  unsigned FnAlign = 4;
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
-
+  unsigned FnAlign = MF.getAlignment();
   EmitAlignment(FnAlign, F);
 
   switch (F->getLinkage()) {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 14db20e..91a8663 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -127,6 +127,11 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const {
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+}
+
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 404534d..4a90a0e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -74,6 +74,9 @@ namespace llvm {
     /// DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
     SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
     SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
     SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index ef6f997..d40bac7 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -350,6 +350,7 @@ unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const {
 
 int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   assert(0 && "Not implemented yet!");
+  return 0;
 }
 
 #include "MSP430GenRegisterInfo.inc"
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index b1fe758..dd09d43 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -62,7 +62,7 @@ bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                              bool Verbose,
                                              raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createMSP430CodePrinterPass(Out, *this, OptLevel, Verbose));
+  PM.add(createMSP430CodePrinterPass(Out, *this, Verbose));
   return false;
 }
 
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 431630b..cb40479 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -51,9 +51,8 @@ namespace {
     const MipsSubtarget *Subtarget;
   public:
     explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM, 
-                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                            bool V)
-      : AsmPrinter(O, TM, T, OL, V) {
+                            const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V) {
       Subtarget = &TM.getSubtarget<MipsSubtarget>();
     }
 
@@ -93,9 +92,8 @@ namespace {
 /// regardless of whether the function is in SSA form.
 FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o,
                                               MipsTargetMachine &tm,
-                                              CodeGenOpt::Level OptLevel,
                                               bool verbose) {
-  return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 //===----------------------------------------------------------------------===//
@@ -230,7 +228,7 @@ emitFunctionStart(MachineFunction &MF)
   SwitchToSection(TAI->SectionForGlobal(F));
 
   // 2 bits aligned
-  EmitAlignment(2, F);
+  EmitAlignment(MF.getAlignment(), F);
 
   O << "\t.globl\t"  << CurrentFnName << '\n';
   O << "\t.ent\t"    << CurrentFnName << '\n';
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 70c7a51..d27e6f1 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -20,3 +20,5 @@ add_llvm_target(MipsCodeGen
   MipsTargetAsmInfo.cpp
   MipsTargetMachine.cpp
   )
+
+target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 0accb4e..9b22a91 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -27,7 +27,6 @@ namespace llvm {
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
   FunctionPass *createMipsCodePrinterPass(raw_ostream &OS, 
                                           MipsTargetMachine &TM,
-                                          CodeGenOpt::Level OptLevel,
                                           bool Verbose);
 } // end namespace llvm;
 
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 42afceb..3d2e2b7 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -154,11 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   computeRegisterProperties();
 }
 
-
 MVT MipsTargetLowering::getSetCCResultType(MVT VT) const {
   return MVT::i32;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
+  return 2;
+}
 
 SDValue MipsTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) 
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 55cd6ea..9ad4895 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -68,8 +68,6 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   class MipsTargetLowering : public TargetLowering 
   {
-    // FrameIndex for return slot.
-    int ReturnAddrIndex;
   public:
 
     explicit MipsTargetLowering(MipsTargetMachine &TM);
@@ -84,6 +82,8 @@ namespace llvm {
     /// getSetCCResultType - get the ISD::SETCC result ValueType
     MVT getSetCCResultType(MVT VT) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   private:
     // Subtarget Info
     const MipsSubtarget *Subtarget;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 92af973..e16fd8e 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -291,14 +291,17 @@ foldMemoryOperandImpl(MachineFunction &MF,
       if (Ops[0] == 0) {    // COPY -> STORE
         unsigned SrcReg = MI->getOperand(2).getReg();
         bool isKill = MI->getOperand(2).isKill();
+        bool isUndef = MI->getOperand(2).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW))
-          .addReg(SrcReg, getKillRegState(isKill))
+          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI);
       } else {              // COPY -> LOAD
         unsigned DstReg = MI->getOperand(0).getReg();
         bool isDead = MI->getOperand(0).isDead();
+        bool isUndef = MI->getOperand(0).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW))
-          .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+          .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
+                  getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI);
       }
     }
@@ -321,14 +324,17 @@ foldMemoryOperandImpl(MachineFunction &MF,
       if (Ops[0] == 0) {    // COPY -> STORE
         unsigned SrcReg = MI->getOperand(1).getReg();
         bool isKill = MI->getOperand(1).isKill();
+        bool isUndef = MI->getOperand(2).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc))
-          .addReg(SrcReg, getKillRegState(isKill))
+          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI) ;
       } else {              // COPY -> LOAD
         unsigned DstReg = MI->getOperand(0).getReg();
         bool isDead = MI->getOperand(0).isDead();
+        bool isUndef = MI->getOperand(0).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc))
-          .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+          .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
+                  getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI);
       }
     }
@@ -645,6 +651,7 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
                               Mips::CPURegsRegisterClass,
                               Mips::CPURegsRegisterClass);
   assert(Ok && "Couldn't assign to global base register!");
+  Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Mips::GP);
 
   MipsFI->setGlobalBaseReg(GlobalBaseReg);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index c5f117b..4675536 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -134,6 +134,6 @@ addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                    bool Verbose, raw_ostream &Out)  {
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+  PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 85fafad..95e5be4 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -39,7 +39,6 @@ namespace llvm {
     // linked in.
     typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                               MipsTargetMachine &tm,
-                                              CodeGenOpt::Level OptLevel,
                                               bool verbose);
     static AsmPrinterCtorFn AsmPrinterCtor;
     
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index cf0f9db..7940648 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -151,6 +151,7 @@ namespace PIC16CC {
         return STATIC_LOCAL;
  
       assert (0 && "Could not determine Symbol's tag");
+      return PREFIX_SYMBOL; // Silence warning when assertions are turned off.
     }
 
     // addPrefix - add prefix symbol to a name if there isn't one already.
@@ -331,7 +332,6 @@ namespace PIC16CC {
   FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM);
   FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS, 
                                            PIC16TargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
   // Banksel optimzer pass.
   FunctionPass *createPIC16MemSelOptimizerPass();
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index ca1089b..1fc1cc1 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -113,9 +113,8 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 ///
 FunctionPass *llvm::createPIC16CodePrinterPass(raw_ostream &o,
                                                PIC16TargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 3ec5659..cb11687 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -30,9 +30,8 @@
 namespace llvm {
   struct VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
     explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) {
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), DbgInfo(O, T) {
       PTLI = TM.getTargetLowering();
       PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
     }
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ec1db90..0d24f61 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -399,7 +399,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
    const Type *RetTy = RetVT.getTypeForMVT();
    std::pair<SDValue,SDValue> CallInfo = 
      LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                 false, CallingConv::C, false, Callee, Args, DAG, dl);
+                 false, 0, CallingConv::C, false, Callee, Args, DAG, dl);
 
   return CallInfo.first;
 }
@@ -841,12 +841,16 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
     // i.e. without any extension
     MVT MemVT = LD->getMemoryVT();
     unsigned MemBytes = MemVT.getSizeInBits() / 8;
+    // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero
+    // So set it to one
+    if (MemBytes == 0) MemBytes = 1;
+    
     unsigned ExtdBytes = VT.getSizeInBits() / 8;
     Offset = DAG.getConstant(LoadOffset, MVT::i8);
 
     Tys = DAG.getVTList(MVT::i8, MVT::Other); 
     // For MemBytes generate PIC16Load with proper offset
-    for (iter=0; iter<MemBytes; ++iter) {
+    for (iter=0; iter < MemBytes; ++iter) {
       // Add the pointer offset if any
       Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
       Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
@@ -1302,7 +1306,8 @@ SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) {
    // Generate new call with all the operands legal
    return DAG.getCall(TheCall->getCallingConv(), dl,
                       TheCall->isVarArg(), TheCall->isTailCall(),
-                      TheCall->isInreg(), VTs, &Ops[0], Ops.size());
+                      TheCall->isInreg(), VTs, &Ops[0], Ops.size(),
+                      TheCall->getNumFixedArgs());
 }
 
 void PIC16TargetLowering::
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index ca9650d..b40ea12 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -145,6 +145,11 @@ namespace llvm {
     unsigned GetTmpSize() { return TmpSize; }
     void SetTmpSize(unsigned Size) { TmpSize = Size; }
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *) const {
+      // FIXME: The function never seems to be aligned.
+      return 1;
+    }
   private:
     // If the Node is a BUILD_PAIR representing a direct Address,
     // then this function will return true.
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
index 7557716..a054bdc 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.td
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -299,7 +299,7 @@ def store_indirect :
 // Direct load.
 // Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel.
 // Output: dst = W
-let mayLoad = 1 in
+let Defs = [STATUS], mayLoad = 1 in
 class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>:
   ByteFormat<0, (outs GPR:$dst), 
              (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index 4304732..77ad188 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -65,11 +65,11 @@ bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
   return false;
 }
 
-bool PIC16TargetMachine::
-addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                   bool Verbose, raw_ostream &Out) {
+bool PIC16TargetMachine::addAssemblyEmitter(PassManagerBase &PM, 
+                                            CodeGenOpt::Level OptLevel,
+                                            bool Verbose, raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createPIC16CodePrinterPass(Out, *this, OptLevel, Verbose));
+  PM.add(createPIC16CodePrinterPass(Out, *this, Verbose));
   return false;
 }
 
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index c5aa6ae..7f1673c 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -56,9 +56,8 @@ namespace {
     const PPCSubtarget &Subtarget;
   public:
     explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                           bool V)
-      : AsmPrinter(O, TM, T, OL, V),
+                           const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V),
         Subtarget(TM.getSubtarget<PPCSubtarget>()) {}
 
     virtual const char *getPassName() const {
@@ -189,8 +188,7 @@ namespace {
       if (TM.getRelocationModel() != Reloc::Static) {
         if (MO.getType() == MachineOperand::MO_GlobalAddress) {
           GlobalValue *GV = MO.getGlobal();
-          if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
-                GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+          if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
             std::string Name = Mang->getValueName(GV);
             FnStubs.insert(Name);
@@ -296,9 +294,8 @@ namespace {
   class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
   public:
     explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
-                                const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                                bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V){}
+                                const TargetAsmInfo *T, bool V)
+      : PPCAsmPrinter(O, TM, T, V){}
 
     virtual const char *getPassName() const {
       return "Linux PPC Assembly Printer";
@@ -323,9 +320,8 @@ namespace {
     raw_ostream &OS;
   public:
     explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
-                                 const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                                 bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V), OS(O) {}
+                                 const TargetAsmInfo *T, bool V)
+      : PPCAsmPrinter(O, TM, T, V), OS(O) {}
 
     virtual const char *getPassName() const {
       return "Darwin PPC Assembly Printer";
@@ -387,11 +383,12 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
     if (TM.getRelocationModel() != Reloc::Static) {
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
         if (GV->hasHiddenVisibility()) {
-          if (!GV->isDeclaration() && !GV->hasCommonLinkage())
-            O << Name;
-          else {
+          if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+              GV->hasAvailableExternallyLinkage()) {
             HiddenGVStubs.insert(Name);
             printSuffixedName(Name, "$non_lazy_ptr");
+          } else {
+            O << Name;
           }
         } else {
           GVStubs.insert(Name);
@@ -596,7 +593,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   printVisibility(CurrentFnName, F->getVisibility());
 
-  EmitAlignment(2, F);
+  EmitAlignment(MF.getAlignment(), F);
   O << CurrentFnName << ":\n";
 
   // Emit pre-function debug information.
@@ -773,7 +770,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   printVisibility(CurrentFnName, F->getVisibility());
 
-  EmitAlignment(F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4, F);
+  EmitAlignment(MF.getAlignment(), F);
   O << CurrentFnName << ":\n";
 
   // Emit pre-function debug information.
@@ -1119,16 +1116,13 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o,
                                             PPCTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose) {
   const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
 
   if (Subtarget->isDarwin()) {
-    return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                   OptLevel, verbose);
+    return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
   } else {
-    return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                  OptLevel, verbose);
+    return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
   }
 }
 
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 0b67aff..a6479d8 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -26,3 +26,5 @@ add_llvm_target(PowerPCCodeGen
   PPCTargetAsmInfo.cpp
   PPCTargetMachine.cpp
   )
+
+target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index c844e21..f6c3469 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -28,9 +28,8 @@ namespace llvm {
   
 FunctionPass *createPPCBranchSelectionPass();
 FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
-FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS,
-                                      PPCTargetMachine &TM,
-                                      CodeGenOpt::Level OptLevel, bool Verbose);
+FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, PPCTargetMachine &TM,
+                                      bool Verbose);
 FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 9f916f3..c7ce171 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -43,10 +43,8 @@ def CC_PPC : CallingConv<[
   CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
   
   // Common sub-targets passes FP values in F1 - F13
-  CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()",
-           CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>,
-  // ELF32 sub-target pass FP values in F1 - F8.
-  CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+  CCIfType<[f32, f64], 
+           CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
            
   // The first 12 Vector arguments are passed in altivec registers.
   CCIfType<[v16i8, v8i16, v4i32, v4f32],
@@ -64,3 +62,86 @@ def CC_PPC : CallingConv<[
 
 */
 
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 ABI
+//===----------------------------------------------------------------------===//
+
+// _Complex arguments are never split, thus their two scalars are either
+// passed both in argument registers or both on the stack. Also _Complex
+// arguments are always passed in general purpose registers, never in
+// Floating-point registers or vector registers. Arguments which should go
+// on the stack are marked with the inreg parameter attribute.
+// Giving inreg this target-dependent (and counter-intuitive) meaning
+// simplifies things, because functions calls are not always coming from the
+// frontend but are also created implicitly e.g. for libcalls. If inreg would
+// actually mean that the argument is passed in a register, then all places
+// which create function calls/function definitions implicitly would need to
+// be aware of this fact and would need to mark arguments accordingly. With
+// inreg meaning that the argument is passed on the stack, this is not an
+// issue, except for calls which involve _Complex types.
+
+def CC_PPC_SVR4_Common : CallingConv<[
+  // The ABI requires i64 to be passed in two adjacent registers with the first
+  // register having an odd register number.
+  CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+
+  // The first 8 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCIf<"!ArgFlags.isInReg()",
+    CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
+
+  // Make sure the i64 words from a long double are either both passed in
+  // registers or both passed on the stack.
+  CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+  
+  // FP values are passed in F1 - F8.
+  CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+  // Split arguments have an alignment of 8 bytes on the stack.
+  CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+  
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  
+  // Floats are stored in double precision format, thus they have the same
+  // alignment and size as doubles.
+  CCIfType<[f32,f64], CCAssignToStack<8, 8>>,  
+
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC_SVR4_VarArg : CallingConv<[
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
+// vector arguments in vector registers before putting them on the stack.
+def CC_PPC_SVR4 : CallingConv<[
+  // The first 12 Vector arguments are passed in AltiVec registers.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+           
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;  
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC_SVR4 calling convention.
+//
+// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+ 
+def CC_PPC_SVR4_ByVal : CallingConv<[
+  CCIfByVal<CCPassByVal<4, 4>>,
+  
+  CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+]>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index aa3dce1..cd6018d 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -163,8 +163,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   } else if (MO.isGlobal() || MO.isSymbol() ||
              MO.isCPI() || MO.isJTI()) {
     unsigned Reloc = 0;
-    if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho ||
-        MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF ||
+    if (MI.getOpcode() == PPC::BL_Darwin || MI.getOpcode() == PPC::BL8_Darwin ||
+        MI.getOpcode() == PPC::BL_SVR4 || MI.getOpcode() == PPC::BL8_ELF ||
         MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
       Reloc = PPC::reloc_pcrel_bx;
     else {
@@ -246,9 +246,9 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   } else if (MO.isMBB()) {
     unsigned Reloc = 0;
     unsigned Opcode = MI.getOpcode();
-    if (Opcode == PPC::B || Opcode == PPC::BL_Macho ||
-        Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF || 
-        Opcode == PPC::BLA_ELF)
+    if (Opcode == PPC::B || Opcode == PPC::BL_Darwin ||
+        Opcode == PPC::BLA_Darwin|| Opcode == PPC::BL_SVR4 ||
+        Opcode == PPC::BLA_SVR4)
       Reloc = PPC::reloc_pcrel_bx;
     else // BCC instruction
       Reloc = PPC::reloc_pcrel_bcx;
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
index 1b5893d..770a560 100644
--- a/lib/Target/PowerPC/PPCFrameInfo.h
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -14,8 +14,10 @@
 #define POWERPC_FRAMEINFO_H
 
 #include "PPC.h"
+#include "PPCSubtarget.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace llvm {
 
@@ -29,63 +31,153 @@ public:
 
   /// getReturnSaveOffset - Return the previous frame offset to save the
   /// return address.
-  static unsigned getReturnSaveOffset(bool LP64, bool isMacho) {
-    if (isMacho)
+  static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) {
+    if (isDarwinABI)
       return LP64 ? 16 : 8;
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     return 4;
   }
 
   /// getFramePointerSaveOffset - Return the previous frame offset to save the
   /// frame pointer.
-  static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) {
-    // For MachO ABI:
+  static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) {
+    // For the Darwin ABI:
     // Use the TOC save slot in the PowerPC linkage area for saving the frame
     // pointer (if needed.)  LLVM does not generate code that uses the TOC (R2
     // is treated as a caller saved register.)
-    if (isMacho)
+    if (isDarwinABI)
       return LP64 ? 40 : 20;
     
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     // Save it right before the link register
     return -4U;
   }
   
   /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
   ///
-  static unsigned getLinkageSize(bool LP64, bool isMacho) {
-    if (isMacho)
+  static unsigned getLinkageSize(bool LP64, bool isDarwinABI) {
+    if (isDarwinABI)
       return 6 * (LP64 ? 8 : 4);
     
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     return 8;
   }
 
   /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
   /// argument area.
-  static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) {
-    // For Macho ABI:
+  static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) {
+    // For the Darwin ABI:
     // The prolog code of the callee may store up to 8 GPR argument registers to
     // the stack, allowing va_start to index over them in memory if its varargs.
     // Because we cannot tell if this is needed on the caller side, we have to
     // conservatively assume that it is needed.  As such, make sure we have at
     // least enough stack space for the caller to store the 8 GPRs.
-    if (isMacho)
+    if (isDarwinABI)
       return 8 * (LP64 ? 8 : 4);
     
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     // There is no default stack allocated for the 8 first GPR arguments.
     return 0;
   }
 
   /// getMinCallFrameSize - Return the minimum size a call frame can be using
   /// the PowerPC ABI.
-  static unsigned getMinCallFrameSize(bool LP64, bool isMacho) {
+  static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) {
     // The call frame needs to be at least big enough for linkage and 8 args.
-    return getLinkageSize(LP64, isMacho) +
-           getMinCallArgumentsSize(LP64, isMacho);
+    return getLinkageSize(LP64, isDarwinABI) +
+           getMinCallArgumentsSize(LP64, isDarwinABI);
+  }
+
+  // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+  const std::pair<unsigned, int> *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    // Early exit if not using the SVR4 ABI.
+    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+      NumEntries = 0;
+      return 0;
+    }
+    
+    static const std::pair<unsigned, int> Offsets[] = {
+      // Floating-point register save area offsets.
+      std::pair<unsigned, int>(PPC::F31, -8),
+      std::pair<unsigned, int>(PPC::F30, -16),
+      std::pair<unsigned, int>(PPC::F29, -24),
+      std::pair<unsigned, int>(PPC::F28, -32),
+      std::pair<unsigned, int>(PPC::F27, -40),
+      std::pair<unsigned, int>(PPC::F26, -48),
+      std::pair<unsigned, int>(PPC::F25, -56),
+      std::pair<unsigned, int>(PPC::F24, -64),
+      std::pair<unsigned, int>(PPC::F23, -72),
+      std::pair<unsigned, int>(PPC::F22, -80),
+      std::pair<unsigned, int>(PPC::F21, -88),
+      std::pair<unsigned, int>(PPC::F20, -96),
+      std::pair<unsigned, int>(PPC::F19, -104),
+      std::pair<unsigned, int>(PPC::F18, -112),
+      std::pair<unsigned, int>(PPC::F17, -120),
+      std::pair<unsigned, int>(PPC::F16, -128),
+      std::pair<unsigned, int>(PPC::F15, -136),
+      std::pair<unsigned, int>(PPC::F14, -144),
+        
+      // General register save area offsets.
+      std::pair<unsigned, int>(PPC::R31, -4),
+      std::pair<unsigned, int>(PPC::R30, -8),
+      std::pair<unsigned, int>(PPC::R29, -12),
+      std::pair<unsigned, int>(PPC::R28, -16),
+      std::pair<unsigned, int>(PPC::R27, -20),
+      std::pair<unsigned, int>(PPC::R26, -24),
+      std::pair<unsigned, int>(PPC::R25, -28),
+      std::pair<unsigned, int>(PPC::R24, -32),
+      std::pair<unsigned, int>(PPC::R23, -36),
+      std::pair<unsigned, int>(PPC::R22, -40),
+      std::pair<unsigned, int>(PPC::R21, -44),
+      std::pair<unsigned, int>(PPC::R20, -48),
+      std::pair<unsigned, int>(PPC::R19, -52),
+      std::pair<unsigned, int>(PPC::R18, -56),
+      std::pair<unsigned, int>(PPC::R17, -60),
+      std::pair<unsigned, int>(PPC::R16, -64),
+      std::pair<unsigned, int>(PPC::R15, -68),
+      std::pair<unsigned, int>(PPC::R14, -72),
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      std::pair<unsigned, int>(PPC::CR2, -4),
+//      std::pair<unsigned, int>(PPC::CR3, -4),
+//      std::pair<unsigned, int>(PPC::CR4, -4),
+//      std::pair<unsigned, int>(PPC::CR2LT, -4),
+//      std::pair<unsigned, int>(PPC::CR2GT, -4),
+//      std::pair<unsigned, int>(PPC::CR2EQ, -4),
+//      std::pair<unsigned, int>(PPC::CR2UN, -4),
+//      std::pair<unsigned, int>(PPC::CR3LT, -4),
+//      std::pair<unsigned, int>(PPC::CR3GT, -4),
+//      std::pair<unsigned, int>(PPC::CR3EQ, -4),
+//      std::pair<unsigned, int>(PPC::CR3UN, -4),
+//      std::pair<unsigned, int>(PPC::CR4LT, -4),
+//      std::pair<unsigned, int>(PPC::CR4GT, -4),
+//      std::pair<unsigned, int>(PPC::CR4EQ, -4),
+//      std::pair<unsigned, int>(PPC::CR4UN, -4),
+
+      // VRSAVE save area offset.
+      std::pair<unsigned, int>(PPC::VRSAVE, -4),
+      
+      // Vector register save area
+      std::pair<unsigned, int>(PPC::V31, -16),
+      std::pair<unsigned, int>(PPC::V30, -32),
+      std::pair<unsigned, int>(PPC::V29, -48),
+      std::pair<unsigned, int>(PPC::V28, -64),
+      std::pair<unsigned, int>(PPC::V27, -80),
+      std::pair<unsigned, int>(PPC::V26, -96),
+      std::pair<unsigned, int>(PPC::V25, -112),
+      std::pair<unsigned, int>(PPC::V24, -128),
+      std::pair<unsigned, int>(PPC::V23, -144),
+      std::pair<unsigned, int>(PPC::V22, -160),
+      std::pair<unsigned, int>(PPC::V21, -176),
+      std::pair<unsigned, int>(PPC::V20, -192)
+    };
+    
+    NumEntries = array_lengthof(Offsets);
+    
+    return Offsets;
   }
-  
 };
 
 } // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index e7658fc..ec3e757 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -159,7 +159,7 @@ getHazardType(SUnit *SU) {
   }
   
   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
-  if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF))
+  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
     return NoopHazard;
   
   // If this is a load following a store, make sure it's not to the same or
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 87f8fb0b4..1c6b287 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -35,6 +35,21 @@
 #include "llvm/DerivedTypes.h"
 using namespace llvm;
 
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State);
+
 static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
 cl::desc("enable preincrement load/store generation on PPC (experimental)"),
                                      cl::Hidden);
@@ -190,8 +205,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
-  // VAARG is custom lowered with ELF 32 ABI
-  if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())
+  // VAARG is custom lowered with the SVR4 ABI
+  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI())
     setOperationAction(ISD::VAARG, MVT::Other, Custom);
   else
     setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -380,7 +395,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
   // Darwin passes everything on 4 byte boundary.
   if (TM.getSubtarget<PPCSubtarget>().isDarwin())
     return 4;
-  // FIXME Elf TBD
+  // FIXME SVR4 TBD
   return 4;
 }
 
@@ -404,11 +419,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::SHL:             return "PPCISD::SHL";
   case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
   case PPCISD::STD_32:          return "PPCISD::STD_32";
-  case PPCISD::CALL_ELF:        return "PPCISD::CALL_ELF";
-  case PPCISD::CALL_Macho:      return "PPCISD::CALL_Macho";
+  case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
+  case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
-  case PPCISD::BCTRL_Macho:     return "PPCISD::BCTRL_Macho";
-  case PPCISD::BCTRL_ELF:       return "PPCISD::BCTRL_ELF";
+  case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
+  case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
   case PPCISD::MFCR:            return "PPCISD::MFCR";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
@@ -428,11 +443,17 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
-
 MVT PPCTargetLowering::getSetCCResultType(MVT VT) const {
   return MVT::i32;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const {
+  if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin())
+    return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4;
+  else
+    return 2;
+}
 
 //===----------------------------------------------------------------------===//
 // Node matching predicates, for use by the tblgen matching code.
@@ -1228,7 +1249,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                               unsigned VarArgsNumFPR,
                               const PPCSubtarget &Subtarget) {
 
-  assert(0 && "VAARG in ELF32 ABI not implemented yet!");
+  assert(0 && "VAARG not yet implemented for the SVR4 ABI!");
   return SDValue(); // Not reached
 }
 
@@ -1261,7 +1282,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
   std::pair<SDValue, SDValue> CallResult =
     LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false,
-                false, false, CallingConv::C, false,
+                false, false, 0, CallingConv::C, false,
                 DAG.getExternalSymbol("__trampoline_setup", PtrVT),
                 Args, DAG, dl);
 
@@ -1279,7 +1300,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
                                         const PPCSubtarget &Subtarget) {
   DebugLoc dl = Op.getDebugLoc();
 
-  if (Subtarget.isMachoABI()) {
+  if (Subtarget.isDarwinABI()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1288,7 +1309,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
   }
 
-  // For ELF 32 ABI we follow the layout of the va_list struct.
+  // For the SVR4 ABI we follow the layout of the va_list struct.
   // We suppose the given va_list is already allocated.
   //
   // typedef struct {
@@ -1313,8 +1334,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   // } va_list[1];
 
 
-  SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);
-  SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
+  SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32);
+  SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32);
 
 
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1334,15 +1355,15 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
   // Store first byte : number of int regs
-  SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR,
-                                      Op.getOperand(1), SV, 0);
+  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+                                         Op.getOperand(1), SV, 0, MVT::i8);
   uint64_t nextOffset = FPROffset;
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
                                   ConstFPROffset);
 
   // Store second byte : number of float regs
   SDValue secondStore =
-    DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset);
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8);
   nextOffset += StackOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
 
@@ -1359,10 +1380,71 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
 #include "PPCGenCallingConv.inc"
 
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State) {
+  return true;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // Skip one register if the first unallocated register has an even register
+  // number and there are still argument registers available which have not been
+  // allocated yet. RegNum is actually an index into ArgRegs, which means we
+  // need to skip a register if RegNum is odd.
+  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+  
+  // Always return false here, as this function only makes sure that the first
+  // unallocated register has an odd register number and does not actually
+  // allocate a register for the current argument.
+  return false;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // If there is only one Floating-point register left we need to put both f64
+  // values of a split ppc_fp128 value on the stack.
+  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+  
+  // Always return false here, as this function only makes sure that the two f64
+  // values a ppc_fp128 value is split into are both passed in registers or both
+  // passed on the stack and does not actually allocate a register for the
+  // current argument.
+  return false;
+}
+
 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
 /// depending on which subtarget is selected.
 static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
-  if (Subtarget.isMachoABI()) {
+  if (Subtarget.isDarwinABI()) {
     static const unsigned FPR[] = {
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
@@ -1381,9 +1463,9 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
 /// the stack.
 static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
-                                       bool isVarArg, unsigned PtrByteSize) {
+                                       unsigned PtrByteSize) {
   MVT ArgVT = Arg.getValueType();
-  unsigned ArgSize =ArgVT.getSizeInBits()/8;
+  unsigned ArgSize = ArgVT.getSizeInBits()/8;
   if (Flags.isByVal())
     ArgSize = Flags.getByValSize();
   ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -1392,18 +1474,248 @@ static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
 }
 
 SDValue
-PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
-                                         SelectionDAG &DAG,
-                                         int &VarArgsFrameIndex,
-                                         int &VarArgsStackOffset,
-                                         unsigned &VarArgsNumGPR,
-                                         unsigned &VarArgsNumFPR,
-                                         const PPCSubtarget &Subtarget) {
+PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
+                                              SelectionDAG &DAG,
+                                              int &VarArgsFrameIndex,
+                                              int &VarArgsStackOffset,
+                                              unsigned &VarArgsNumGPR,
+                                              unsigned &VarArgsNumFPR,
+                                              const PPCSubtarget &Subtarget) {
+  // SVR4 ABI Stack Frame Layout:
+  //              +-----------------------------------+
+  //        +-->  |            Back chain             |
+  //        |     +-----------------------------------+
+  //        |     | Floating-point register save area |
+  //        |     +-----------------------------------+
+  //        |     |    General register save area     |
+  //        |     +-----------------------------------+
+  //        |     |          CR save word             |
+  //        |     +-----------------------------------+
+  //        |     |         VRSAVE save word          |
+  //        |     +-----------------------------------+
+  //        |     |         Alignment padding         |
+  //        |     +-----------------------------------+
+  //        |     |     Vector register save area     |
+  //        |     +-----------------------------------+
+  //        |     |       Local variable space        |
+  //        |     +-----------------------------------+
+  //        |     |        Parameter list area        |
+  //        |     +-----------------------------------+
+  //        |     |           LR save word            |
+  //        |     +-----------------------------------+
+  // SP-->  +---  |            Back chain             |
+  //              +-----------------------------------+
+  //
+  // Specifications:
+  //   System V Application Binary Interface PowerPC Processor Supplement
+  //   AltiVec Technology Programming Interface Manual
+  
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  SmallVector<SDValue, 8> ArgValues;
+  SDValue Root = Op.getOperand(0);
+  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+  DebugLoc dl = Op.getDebugLoc();
+
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Potential tail calls could cause overwriting of argument stack slots.
+  unsigned CC = MF.getFunction()->getCallingConv();
+  bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
+  unsigned PtrByteSize = 4;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+
+  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4);
+  
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    
+    // Arguments stored in registers.
+    if (VA.isRegLoc()) {
+      TargetRegisterClass *RC;
+      MVT ValVT = VA.getValVT();
+      
+      switch (ValVT.getSimpleVT()) {
+        default:
+          assert(0 && "ValVT not supported by FORMAL_ARGUMENTS Lowering");
+        case MVT::i32:
+          RC = PPC::GPRCRegisterClass;
+          break;
+        case MVT::f32:
+          RC = PPC::F4RCRegisterClass;
+          break;
+        case MVT::f64:
+          RC = PPC::F8RCRegisterClass;
+          break;
+        case MVT::v16i8:
+        case MVT::v8i16:
+        case MVT::v4i32:
+        case MVT::v4f32:
+          RC = PPC::VRRCRegisterClass;
+          break;
+      }
+      
+      // Transform the arguments stored in physical registers into virtual ones.
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, ValVT);
+
+      ArgValues.push_back(ArgValue);
+    } else {
+      // Argument stored in memory.
+      assert(VA.isMemLoc());
+
+      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+                                      isImmutable);
+
+      // Create load nodes to retrieve arguments from the stack.
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+    }
+  }
+
+  // Assign locations to all of the incoming aggregate by value arguments.
+  // Aggregates passed by value are stored in the local variable space of the
+  // caller's stack frame, right above the parameter list area.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4_ByVal);
+
+  // Area that is at least reserved in the caller of this function.
+  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+  
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized function's reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+  MinReservedArea =
+    std::max(MinReservedArea,
+             PPCFrameInfo::getMinCallFrameSize(false, false));
+  
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+    getStackAlignment();
+  unsigned AlignMask = TargetAlign-1;
+  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+  
+  FI->setMinReservedArea(MinReservedArea);
+
+  SmallVector<SDValue, 8> MemOps;
+  
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    static const unsigned GPArgRegs[] = {
+      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+    };
+    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+    static const unsigned FPArgRegs[] = {
+      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+      PPC::F8
+    };
+    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+    VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs);
+    VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs);
+
+    // Make room for NumGPArgRegs and NumFPArgRegs.
+    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
+
+    VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                                                CCInfo.getNextStackOffset());
+
+    VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8);
+    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+    // The fixed integer arguments of a variadic function are
+    // stored to the VarArgsFrameIndex on the stack.
+    unsigned GPRIndex = 0;
+    for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
+      SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
+      SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    // If this function is vararg, store any remaining integer argument regs
+    // to their spots on the stack so that they may be loaded by deferencing the
+    // result of va_next.
+    for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+      unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is
+    // set.
+    
+    // The double arguments are stored to the VarArgsFrameIndex
+    // on the stack.
+    unsigned FPRIndex = 0;
+    for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
+      SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
+      SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by eight for the next argument to store
+      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+                                         PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+      unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by eight for the next argument to store
+      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+                                         PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+  }
+
+  if (!MemOps.empty())
+    Root = DAG.getNode(ISD::TokenFactor, dl,
+                       MVT::Other, &MemOps[0], MemOps.size());
+
+  
+  ArgValues.push_back(Root);
+
+  // Return the new list of results.
+  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+SDValue
+PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
+                                                SelectionDAG &DAG,
+                                                int &VarArgsFrameIndex,
+                                                const PPCSubtarget &Subtarget) {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
   SmallVector<SDValue, 8> ArgValues;
   SDValue Root = Op.getOperand(0);
   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
@@ -1411,14 +1723,12 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
 
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
-  bool isMachoABI = Subtarget.isMachoABI();
-  bool isELF32_ABI = Subtarget.isELF32_ABI();
   // Potential tail calls could cause overwriting of argument stack slots.
   unsigned CC = MF.getFunction()->getCallingConv();
   bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
-  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
 
@@ -1439,7 +1749,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   };
 
   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
-  const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;
+  const unsigned Num_FPR_Regs = 13;
   const unsigned Num_VR_Regs  = array_lengthof( VR);
 
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
@@ -1453,8 +1763,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   // that out...for the pathological case, compute VecArgOffset as the
   // start of the vector parameter area.  Computing VecArgOffset is the
   // entire point of the following loop.
-  // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying
-  // to handle Elf here.
   unsigned VecArgOffset = ArgOffset;
   if (!isVarArg && !isPPC64) {
     for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e;
@@ -1500,10 +1808,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   // Add DAG nodes to load the arguments or copy them out of registers.  On
   // entry to a function on PPC, the arguments start after the linkage area,
   // although the first ones are often in registers.
-  //
-  // In the ELF 32 ABI, GPRs and stack are double word align: an argument
-  // represented with two words (long long or double) must be copied to an
-  // even GPR_idx value or to an even ArgOffset value.
 
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
@@ -1516,8 +1820,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
     unsigned ArgSize = ObjSize;
     ISD::ArgFlagsTy Flags =
       cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
-    // See if next argument requires stack alignment in ELF
-    bool Align = Flags.isSplit();
 
     unsigned CurArgOffset = ArgOffset;
 
@@ -1528,25 +1830,20 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
         MinReservedArea = ((MinReservedArea+15)/16)*16;
         MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
                                                   Flags,
-                                                  isVarArg,
                                                   PtrByteSize);
       } else  nAltivecParamsAtEnd++;
     } else
       // Calculate min reserved area.
       MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
                                                 Flags,
-                                                isVarArg,
                                                 PtrByteSize);
 
-    // FIXME alignment for ELF may not be right
     // FIXME the codegen can be much improved in some cases.
     // We do not have to keep everything in memory.
     if (Flags.isByVal()) {
       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
       ObjSize = Flags.getByValSize();
       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
-      // Double word align in ELF
-      if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
       // Objects of size 1 and 2 are right justified, everything else is
       // left justified.  This means the memory address is adjusted forwards.
       if (ObjSize==1 || ObjSize==2) {
@@ -1558,17 +1855,16 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
       ArgValues.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
-          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
           MemOps.push_back(Store);
           ++GPR_idx;
-          if (isMachoABI) ArgOffset += PtrByteSize;
-        } else {
-          ArgOffset += PtrByteSize;
         }
+        
+        ArgOffset += PtrByteSize;
+        
         continue;
       }
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
@@ -1576,15 +1872,14 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
         // to memory.  ArgVal will be address of the beginning of
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
-          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
-          if (isMachoABI) ArgOffset += PtrByteSize;
+          ArgOffset += PtrByteSize;
         } else {
           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
           break;
@@ -1597,30 +1892,22 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
     default: assert(0 && "Unhandled argument type!");
     case MVT::i32:
       if (!isPPC64) {
-        // Double word align in ELF
-        if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
-
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
-          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
           ++GPR_idx;
         } else {
           needsLoad = true;
           ArgSize = PtrByteSize;
         }
-        // Stack align in ELF
-        if (needsLoad && Align && isELF32_ABI)
-          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-        // All int arguments reserve stack space in Macho ABI.
-        if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
+        // All int arguments reserve stack space in the Darwin ABI.
+        ArgOffset += PtrByteSize;
         break;
       }
       // FALLTHROUGH
     case MVT::i64:  // PPC64
       if (GPR_idx != Num_GPR_Regs) {
-        unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
-        RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32) {
@@ -1641,37 +1928,35 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
         needsLoad = true;
         ArgSize = PtrByteSize;
       }
-      // All int arguments reserve stack space in Macho ABI.
-      if (isMachoABI || needsLoad) ArgOffset += 8;
+      // All int arguments reserve stack space in the Darwin ABI.
+      ArgOffset += 8;
       break;
 
     case MVT::f32:
     case MVT::f64:
       // Every 4 bytes of argument space consumes one of the GPRs available for
       // argument passing.
-      if (GPR_idx != Num_GPR_Regs && isMachoABI) {
+      if (GPR_idx != Num_GPR_Regs) {
         ++GPR_idx;
         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
           ++GPR_idx;
       }
       if (FPR_idx != Num_FPR_Regs) {
         unsigned VReg;
+
         if (ObjectVT == MVT::f32)
-          VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
         else
-          VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
-        RegInfo.addLiveIn(FPR[FPR_idx], VReg);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
         ++FPR_idx;
       } else {
         needsLoad = true;
       }
 
-      // Stack align in ELF
-      if (needsLoad && Align && isELF32_ABI)
-        ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-      // All FP arguments reserve stack space in Macho ABI.
-      if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
+      // All FP arguments reserve stack space in the Darwin ABI.
+      ArgOffset += isPPC64 ? 8 : ObjSize;
       break;
     case MVT::v4f32:
     case MVT::v4i32:
@@ -1680,8 +1965,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
       // Note that vector arguments in registers don't reserve stack space,
       // except in varargs functions.
       if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);
-        RegInfo.addLiveIn(VR[VR_idx], VReg);
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
         if (isVarArg) {
           while ((ArgOffset % 16) != 0) {
@@ -1734,7 +2018,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   }
   MinReservedArea =
     std::max(MinReservedArea,
-             PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+             PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
   unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
     getStackAlignment();
   unsigned AlignMask = TargetAlign-1;
@@ -1744,53 +2028,23 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-
-    int depth;
-    if (isELF32_ABI) {
-      VarArgsNumGPR = GPR_idx;
-      VarArgsNumFPR = FPR_idx;
-
-      // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
-      // pointer.
-      depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 +
-                Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 +
-                PtrVT.getSizeInBits()/8);
-
-      VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                                  ArgOffset);
-
-    }
-    else
-      depth = ArgOffset;
+    int Depth = ArgOffset;
 
     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                               depth);
+                                               Depth);
     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
 
-    // In ELF 32 ABI, the fixed integer arguments of a variadic function are
-    // stored to the VarArgsFrameIndex on the stack.
-    if (isELF32_ABI) {
-      for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {
-        SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT);
-        SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
-        MemOps.push_back(Store);
-        // Increment the address by four for the next argument to store
-        SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
-        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-      }
-    }
-
     // If this function is vararg, store any remaining integer argument regs
     // to their spots on the stack so that they may be loaded by deferencing the
     // result of va_next.
     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
       unsigned VReg;
+      
       if (isPPC64)
-        VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
       else
-        VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
-      RegInfo.addLiveIn(GPR[GPR_idx], VReg);
       SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
       MemOps.push_back(Store);
@@ -1798,34 +2052,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
-
-    // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
-    // on the stack.
-    if (isELF32_ABI) {
-      for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {
-        SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);
-        SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
-        MemOps.push_back(Store);
-        // Increment the address by eight for the next argument to store
-        SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
-                                           PtrVT);
-        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-      }
-
-      for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {
-        unsigned VReg;
-        VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
-
-        RegInfo.addLiveIn(FPR[FPR_idx], VReg);
-        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
-        MemOps.push_back(Store);
-        // Increment the address by eight for the next argument to store
-        SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
-                                           PtrVT);
-        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-      }
-    }
   }
 
   if (!MemOps.empty())
@@ -1840,11 +2066,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
 }
 
 /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
-/// linkage area.
+/// linkage area for the Darwin ABI.
 static unsigned
 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                                      bool isPPC64,
-                                     bool isMachoABI,
                                      bool isVarArg,
                                      unsigned CC,
                                      CallSDNode *TheCall,
@@ -1852,7 +2077,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
-  unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+  unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true);
   unsigned NumOps = TheCall->getNumArgs();
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
@@ -1879,7 +2104,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
       NumBytes = ((NumBytes+15)/16)*16;
     }
-    NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize);
+    NumBytes += CalculateStackSlotSize(Arg, Flags, PtrByteSize);
   }
 
    // Allow for Altivec parameters at the end, if needed.
@@ -1894,7 +2119,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
   NumBytes = std::max(NumBytes,
-                      PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+                      PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
 
   // Tail call needs the stack to be aligned.
   if (CC==CallingConv::Fast && PerformTailCallOpt) {
@@ -2017,26 +2242,30 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                SDValue OldFP,
                                                int SPDiff,
                                                bool isPPC64,
-                                               bool isMachoABI,
+                                               bool isDarwinABI,
                                                DebugLoc dl) {
   if (SPDiff) {
     // Calculate the new stack slot for the return address.
     int SlotSize = isPPC64 ? 8 : 4;
     int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
-                                                                   isMachoABI);
+                                                                   isDarwinABI);
     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
                                                           NewRetAddrLoc);
-    int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
-                                                                    isMachoABI);
-    int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
-
     MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
                          PseudoSourceValue::getFixedStack(NewRetAddr), 0);
-    SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
-    Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                         PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+
+    // When using the SVR4 ABI there is no need to move the FP stack slot
+    // as the FP is never overwritten.
+    if (isDarwinABI) {
+      int NewFPLoc =
+        SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+                           PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+    }
   }
   return Chain;
 }
@@ -2067,6 +2296,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                         SDValue Chain,
                                                         SDValue &LROpOut,
                                                         SDValue &FPOpOut,
+                                                        bool isDarwinABI,
                                                         DebugLoc dl) {
   if (SPDiff) {
     // Load the LR and FP stack slot for later adjusting.
@@ -2074,9 +2304,14 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
     LROpOut = getReturnAddrFrameIndex(DAG);
     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
-    FPOpOut = getFramePointerFrameIndex(DAG);
-    FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
-    Chain = SDValue(FPOpOut.getNode(), 1);
+    
+    // When using the SVR4 ABI there is no need to load the FP stack slot
+    // as the FP is never overwritten.
+    if (isDarwinABI) {
+      FPOpOut = getFramePointerFrameIndex(DAG);
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
+      Chain = SDValue(FPOpOut.getNode(), 1);
+    }
   }
   return Chain;
 }
@@ -2090,8 +2325,8 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
 static SDValue
 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
-                          unsigned Size, DebugLoc dl) {
-  SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        false, NULL, 0, NULL, 0);
 }
@@ -2122,21 +2357,387 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                                   TailCallArguments);
 }
 
-SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
-                                       const PPCSubtarget &Subtarget,
-                                       TargetMachine &TM) {
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
+                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+  // might overwrite each other in case of tail call optimization.
+  SmallVector<SDValue, 8> MemOpChains2;
+  // Do not flag preceeding copytoreg stuff together with the following stuff.
+  InFlag = SDValue();
+  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+                                    MemOpChains2, dl);
+  if (!MemOpChains2.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains2[0], MemOpChains2.size());
+
+  // Store the return address to the appropriate stack slot.
+  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+                                        isPPC64, isDarwinABI, dl);
+
+  // Emit callseq_end just before tailcall node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                     SmallVector<SDValue, 8> &Ops, std::vector<MVT> &NodeTys,
+                     bool isSVR4ABI) {
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  NodeTys.push_back(MVT::Other);   // Returns a chain
+  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
+
+  unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
+  else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+    // If this is an absolute destination address, use the munged value.
+    Callee = SDValue(Dest, 0);
+  else {
+    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
+    // to do the call, we can't use PPCISD::CALL.
+    SDValue MTCTROps[] = {Chain, Callee, InFlag};
+    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+                        2 + (InFlag.getNode() != 0));
+    InFlag = Chain.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Other);
+    NodeTys.push_back(MVT::Flag);
+    Ops.push_back(Chain);
+    CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+    Callee.setNode(0);
+    // Add CTR register as callee so a bctr can be emitted later.
+    if (isTailCall)
+      Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT));
+  }
+
+  // If this is a direct call, pass the chain and the callee.
+  if (Callee.getNode()) {
+    Ops.push_back(Chain);
+    Ops.push_back(Callee);
+  }
+  // If this is a tail call add stack pointer delta.
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  return CallOpc;
+}
+
+static SDValue LowerCallReturn(SDValue Op, SelectionDAG &DAG, TargetMachine &TM,
+                               CallSDNode *TheCall, SDValue Chain,
+                               SDValue InFlag) {
+  bool isVarArg = TheCall->isVarArg();
+  DebugLoc dl = TheCall->getDebugLoc();
+  SmallVector<SDValue, 16> ResultVals;
+  SmallVector<CCValAssign, 16> RVLocs;
+  unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
+  CCState CCRetInfo(CallerCC, isVarArg, TM, RVLocs);
+  CCRetInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign &VA = RVLocs[i];
+    MVT VT = VA.getValVT();
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyFromReg(Chain, dl,
+                               VA.getLocReg(), VT, InFlag).getValue(1);
+    ResultVals.push_back(Chain.getValue(0));
+    InFlag = Chain.getValue(2);
+  }
+
+  // If the function returns void, just return the chain.
+  if (RVLocs.empty())
+    return Chain;
+
+  // Otherwise, merge everything together with a MERGE_VALUES node.
+  ResultVals.push_back(Chain);
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+                            &ResultVals[0], ResultVals.size());
+  return Res.getValue(Op.getResNo());
+}
+
+static
+SDValue FinishCall(SelectionDAG &DAG, CallSDNode *TheCall, TargetMachine &TM,
+                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                   SDValue Op, SDValue InFlag, SDValue Chain, SDValue &Callee,
+                   int SPDiff, unsigned NumBytes) {
+  unsigned CC = TheCall->getCallingConv();
+  DebugLoc dl = TheCall->getDebugLoc();
+  bool isTailCall = TheCall->isTailCall()
+                 && CC == CallingConv::Fast && PerformTailCallOpt;
+
+  std::vector<MVT> NodeTys;
+  SmallVector<SDValue, 8> Ops;
+  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+                                 isTailCall, RegsToPass, Ops, NodeTys,
+                                 TM.getSubtarget<PPCSubtarget>().isSVR4ABI());
+
+  // When performing tail call optimization the callee pops its arguments off
+  // the stack. Account for this here so these bytes can be pushed back on in
+  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+  int BytesCalleePops =
+    (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  // Emit tail call.
+  if (isTailCall) {
+    assert(InFlag.getNode() &&
+           "Flag must be set. Depend on flag being set in LowerRET");
+    Chain = DAG.getNode(PPCISD::TAILCALL, dl,
+                        TheCall->getVTList(), &Ops[0], Ops.size());
+    return SDValue(Chain.getNode(), Op.getResNo());
+  }
+
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(BytesCalleePops, true),
+                             InFlag);
+  if (TheCall->getValueType(0) != MVT::Other)
+    InFlag = Chain.getValue(1);
+
+  return LowerCallReturn(Op, DAG, TM, TheCall, Chain, InFlag);
+}
+
+SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
+                                          const PPCSubtarget &Subtarget,
+                                          TargetMachine &TM) {
+  // See PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4() for a description
+  // of the SVR4 ABI stack frame layout.
   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
   SDValue Chain  = TheCall->getChain();
   bool isVarArg   = TheCall->isVarArg();
   unsigned CC     = TheCall->getCallingConv();
+  assert((CC == CallingConv::C ||
+          CC == CallingConv::Fast) && "Unknown calling convention!");
   bool isTailCall = TheCall->isTailCall()
                  && CC == CallingConv::Fast && PerformTailCallOpt;
   SDValue Callee = TheCall->getCallee();
-  unsigned NumOps  = TheCall->getNumArgs();
   DebugLoc dl = TheCall->getDebugLoc();
 
-  bool isMachoABI = Subtarget.isMachoABI();
-  bool isELF32_ABI  = Subtarget.isELF32_ABI();
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  unsigned PtrByteSize = 4;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Mark this function as potentially containing a function that contains a
+  // tail call. As a consequence the frame pointer will be used for dynamicalloc
+  // and restoring the callers stack pointer in this functions epilog. This is
+  // done because by tail calling the called function might overwrite the value
+  // in this function's (MF) stack pointer stack slot 0(SP).
+  if (PerformTailCallOpt && CC==CallingConv::Fast)
+    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+  
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, parameter list area and the part of the local variable space which
+  // contains copies of aggregates which are passed by value.
+
+  // Assign locations to all of the outgoing arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+
+  if (isVarArg) {
+    // Handle fixed and variable vector arguments differently.
+    // Fixed vector arguments go into registers as long as registers are
+    // available. Variable vector arguments always go into memory.
+    unsigned NumArgs = TheCall->getNumArgs();
+    unsigned NumFixedArgs = TheCall->getNumFixedArgs();
+    
+    for (unsigned i = 0; i != NumArgs; ++i) {
+      MVT ArgVT = TheCall->getArg(i).getValueType();
+      ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
+      bool Result;
+      
+      if (i < NumFixedArgs) {
+        Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+                             CCInfo);
+      } else {
+        Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+                                    ArgFlags, CCInfo);
+      }
+      
+      if (Result) {
+        cerr << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getMVTString() << "\n";
+        abort();
+      }
+    }
+  } else {
+    // All arguments are treated the same.
+    CCInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4);
+  }
+  
+  // Assign locations to all of the outgoing aggregate by value arguments.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4_ByVal);
+
+  // Size of the linkage area, parameter list area and the part of the local
+  // space variable where copies of aggregates which are passed by value are
+  // stored.
+  unsigned NumBytes = CCByValInfo.getNextStackOffset();
+  
+  // Calculate by how many bytes the stack has to be adjusted in case of tail
+  // call optimization.
+  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  SDValue CallSeqStart = Chain;
+
+  // Load the return address and frame pointer so it can be moved somewhere else
+  // later.
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+                                       dl);
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+  
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, j = 0, e = ArgLocs.size();
+       i != e;
+       ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = TheCall->getArg(i);
+    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+    
+    if (Flags.isByVal()) {
+      // Argument is an aggregate which is passed by value, thus we need to
+      // create a copy of it in the local variable space of the current stack
+      // frame (which is the stack frame of the caller) and pass the address of
+      // this copy to the callee.
+      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+      CCValAssign &ByValVA = ByValArgLocs[j++];
+      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+      
+      // Memory reserved in the local variable space of the callers stack frame.
+      unsigned LocMemOffset = ByValVA.getLocMemOffset();
+      
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+      
+      // Create a copy of the argument in the local area of the current
+      // stack frame.
+      SDValue MemcpyCall =
+        CreateCopyOfByValArgument(Arg, PtrOff,
+                                  CallSeqStart.getNode()->getOperand(0),
+                                  Flags, DAG, dl);
+      
+      // This must go outside the CALLSEQ_START..END.
+      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                           CallSeqStart.getNode()->getOperand(1));
+      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                             NewCallSeqStart.getNode());
+      Chain = CallSeqStart = NewCallSeqStart;
+      
+      // Pass the address of the aggregate copy on the stack either in a
+      // physical register or in the parameter list area of the current stack
+      // frame to the callee.
+      Arg = PtrOff;
+    }
+    
+    if (VA.isRegLoc()) {
+      // Put argument in a physical register.
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Put argument in the parameter list area of the current stack frame.
+      assert(VA.isMemLoc());
+      unsigned LocMemOffset = VA.getLocMemOffset();
+
+      if (!isTailCall) {
+        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                              PseudoSourceValue::getStack(), LocMemOffset));
+      } else {
+        // Calculate and remember argument location.
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+                                 TailCallArguments);
+      }
+    }
+  }
+  
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+  
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+  
+  // Set CR6 to true if this is a vararg call.
+  if (isVarArg) {
+    SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  if (isTailCall) {
+    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+                    false, TailCallArguments);
+  }
+
+  return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
+                    SPDiff, NumBytes);
+}
+
+SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
+                                            const PPCSubtarget &Subtarget,
+                                            TargetMachine &TM) {
+  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+  SDValue Chain  = TheCall->getChain();
+  bool isVarArg   = TheCall->isVarArg();
+  unsigned CC     = TheCall->getCallingConv();
+  bool isTailCall = TheCall->isTailCall()
+                 && CC == CallingConv::Fast && PerformTailCallOpt;
+  SDValue Callee = TheCall->getCallee();
+  unsigned NumOps  = TheCall->getNumArgs();
+  DebugLoc dl = TheCall->getDebugLoc();
 
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
@@ -2144,10 +2745,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
 
   MachineFunction &MF = DAG.getMachineFunction();
 
-  // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
-  // SelectExpr to use to put the arguments in the appropriate registers.
-  std::vector<SDValue> args_to_use;
-
   // Mark this function as potentially containing a function that contains a
   // tail call. As a consequence the frame pointer will be used for dynamicalloc
   // and restoring the callers stack pointer in this functions epilog. This is
@@ -2162,8 +2759,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
   unsigned NumBytes =
-    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
-                                         TheCall, nAltivecParamsAtEnd);
+    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CC, TheCall,
+                                         nAltivecParamsAtEnd);
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
@@ -2177,7 +2774,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
   // Load the return address and frame pointer so it can be move somewhere else
   // later.
   SDValue LROp, FPOp;
-  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+                                       dl);
 
   // Set up a copy of the stack pointer for use loading and storing any
   // arguments that may not fit in the registers available for argument
@@ -2192,7 +2790,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
   // memory.  Also, if this is a vararg function, floating point operations
   // must be stored to our stack, and loaded into integer regs as well, if
   // any integer regs are available for argument passing.
-  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
   static const unsigned GPR_32[] = {           // 32-bit registers.
@@ -2210,12 +2808,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
   const unsigned NumGPRs = array_lengthof(GPR_32);
-  const unsigned NumFPRs = isMachoABI ? 13 : 8;
-  const unsigned NumVRs  = array_lengthof( VR);
+  const unsigned NumFPRs = 13;
+  const unsigned NumVRs  = array_lengthof(VR);
 
   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
 
-  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   SmallVector<SDValue, 8> MemOpChains;
@@ -2223,19 +2821,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     bool inMem = false;
     SDValue Arg = TheCall->getArg(i);
     ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
-    // See if next argument requires stack alignment in ELF
-    bool Align = Flags.isSplit();
 
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
     SDValue PtrOff;
 
-    // Stack align in ELF 32
-    if (isELF32_ABI && Align)
-      PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
-                               StackPtr.getValueType());
-    else
-      PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
 
     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
 
@@ -2246,11 +2837,9 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
     }
 
-    // FIXME Elf untested, what are alignment rules?
     // FIXME memcpy is used way more than necessary.  Correctness first.
     if (Flags.isByVal()) {
       unsigned Size = Flags.getByValSize();
-      if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
       if (Size==1 || Size==2) {
         // Very small objects are passed right-justified.
         // Everything else is passed left-justified.
@@ -2260,14 +2849,14 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
                                           NULL, 0, VT);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
-          if (isMachoABI)
-            ArgOffset += PtrByteSize;
+
+          ArgOffset += PtrByteSize;
         } else {
           SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
           SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
                                 CallSeqStart.getNode()->getOperand(0),
-                                Flags, DAG, Size, dl);
+                                Flags, DAG, dl);
           // This must go outside the CALLSEQ_START..END.
           SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                                CallSeqStart.getNode()->getOperand(1));
@@ -2283,7 +2872,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
       // registers.  (This is not what the doc says.)
       SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
                             CallSeqStart.getNode()->getOperand(0),
-                            Flags, DAG, Size, dl);
+                            Flags, DAG, dl);
       // This must go outside the CALLSEQ_START..END.
       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                            CallSeqStart.getNode()->getOperand(1));
@@ -2297,8 +2886,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
-          if (isMachoABI)
-            ArgOffset += PtrByteSize;
+          ArgOffset += PtrByteSize;
         } else {
           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
           break;
@@ -2311,8 +2899,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     default: assert(0 && "Unexpected ValueType for argument!");
     case MVT::i32:
     case MVT::i64:
-      // Double word align in ELF
-      if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
       if (GPR_idx != NumGPRs) {
         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
       } else {
@@ -2321,13 +2907,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
                          TailCallArguments, dl);
         inMem = true;
       }
-      if (inMem || isMachoABI) {
-        // Stack align in ELF
-        if (isELF32_ABI && Align)
-          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-
-        ArgOffset += PtrByteSize;
-      }
+      ArgOffset += PtrByteSize;
       break;
     case MVT::f32:
     case MVT::f64:
@@ -2342,28 +2922,24 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
           if (GPR_idx != NumGPRs) {
             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
             MemOpChains.push_back(Load.getValue(1));
-            if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
-                                                                Load));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
             MemOpChains.push_back(Load.getValue(1));
-            if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
-                                                                Load));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
         } else {
           // If we have any FPRs remaining, we may also have GPRs remaining.
           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
           // GPRs.
-          if (isMachoABI) {
-            if (GPR_idx != NumGPRs)
-              ++GPR_idx;
-            if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
-                !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
-              ++GPR_idx;
-          }
+          if (GPR_idx != NumGPRs)
+            ++GPR_idx;
+          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
+            ++GPR_idx;
         }
       } else {
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
@@ -2371,15 +2947,10 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
                          TailCallArguments, dl);
         inMem = true;
       }
-      if (inMem || isMachoABI) {
-        // Stack align in ELF
-        if (isELF32_ABI && Align)
-          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-        if (isPPC64)
-          ArgOffset += 8;
-        else
-          ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
-      }
+      if (isPPC64)
+        ArgOffset += 8;
+      else
+        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
       break;
     case MVT::v4f32:
     case MVT::v4i32:
@@ -2475,148 +3046,13 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     InFlag = Chain.getValue(1);
   }
 
-  // With the ELF 32 ABI, set CR6 to true if this is a vararg call.
-  if (isVarArg && isELF32_ABI) {
-    SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
-  // might overwrite each other in case of tail call optimization.
   if (isTailCall) {
-    SmallVector<SDValue, 8> MemOpChains2;
-    // Do not flag preceeding copytoreg stuff together with the following stuff.
-    InFlag = SDValue();
-    StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
-                                      MemOpChains2, dl);
-    if (!MemOpChains2.empty())
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                          &MemOpChains2[0], MemOpChains2.size());
-
-    // Store the return address to the appropriate stack slot.
-    Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
-                                          isPPC64, isMachoABI, dl);
-  }
-
-  // Emit callseq_end just before tailcall node.
-  if (isTailCall) {
-    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                               DAG.getIntPtrConstant(0, true), InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  std::vector<MVT> NodeTys;
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
-
-  SmallVector<SDValue, 8> Ops;
-  unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;
-
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
-  // node so that legalize doesn't hack it.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
-  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
-  else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
-    // If this is an absolute destination address, use the munged value.
-    Callee = SDValue(Dest, 0);
-  else {
-    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
-    // to do the call, we can't use PPCISD::CALL.
-    SDValue MTCTROps[] = {Chain, Callee, InFlag};
-    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
-                        2 + (InFlag.getNode() != 0));
-    InFlag = Chain.getValue(1);
-
-    // Copy the callee address into R12/X12 on darwin.
-    if (isMachoABI) {
-      unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12;
-      Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag);
-      InFlag = Chain.getValue(1);
-    }
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Other);
-    NodeTys.push_back(MVT::Flag);
-    Ops.push_back(Chain);
-    CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
-    Callee.setNode(0);
-    // Add CTR register as callee so a bctr can be emitted later.
-    if (isTailCall)
-      Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy()));
-  }
-
-  // If this is a direct call, pass the chain and the callee.
-  if (Callee.getNode()) {
-    Ops.push_back(Chain);
-    Ops.push_back(Callee);
+    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+                    FPOp, true, TailCallArguments);
   }
-  // If this is a tail call add stack pointer delta.
-  if (isTailCall)
-    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
 
-  // Add argument registers to the end of the list so that they are known live
-  // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  // When performing tail call optimization the callee pops its arguments off
-  // the stack. Account for this here so these bytes can be pushed back on in
-  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
-  int BytesCalleePops =
-    (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-
-  // Emit tail call.
-  if (isTailCall) {
-    assert(InFlag.getNode() &&
-           "Flag must be set. Depend on flag being set in LowerRET");
-    Chain = DAG.getNode(PPCISD::TAILCALL, dl,
-                        TheCall->getVTList(), &Ops[0], Ops.size());
-    return SDValue(Chain.getNode(), Op.getResNo());
-  }
-
-  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(BytesCalleePops, true),
-                             InFlag);
-  if (TheCall->getValueType(0) != MVT::Other)
-    InFlag = Chain.getValue(1);
-
-  SmallVector<SDValue, 16> ResultVals;
-  SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  CCState CCInfo(CallerCC, isVarArg, TM, RVLocs);
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
-
-  // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
-    CCValAssign &VA = RVLocs[i];
-    MVT VT = VA.getValVT();
-    assert(VA.isRegLoc() && "Can only return in registers!");
-    Chain = DAG.getCopyFromReg(Chain, dl,
-                               VA.getLocReg(), VT, InFlag).getValue(1);
-    ResultVals.push_back(Chain.getValue(0));
-    InFlag = Chain.getValue(2);
-  }
-
-  // If the function returns void, just return the chain.
-  if (RVLocs.empty())
-    return Chain;
-
-  // Otherwise, merge everything together with a MERGE_VALUES node.
-  ResultVals.push_back(Chain);
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                            &ResultVals[0], ResultVals.size());
-  return Res.getValue(Op.getResNo());
+  return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
+                    SPDiff, NumBytes);
 }
 
 SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
@@ -2716,7 +3152,7 @@ SDValue
 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool IsPPC64 = PPCSubTarget.isPPC64();
-  bool isMachoABI = PPCSubTarget.isMachoABI();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
@@ -2727,7 +3163,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!RASI) {
     // Find out what the fix offset of the frame pointer save area.
-    int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI);
+    int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
     // Allocate the frame index for frame pointer save area.
     RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
     // Save the result.
@@ -2740,7 +3176,7 @@ SDValue
 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool IsPPC64 = PPCSubTarget.isPPC64();
-  bool isMachoABI = PPCSubTarget.isMachoABI();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
@@ -2751,7 +3187,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI) {
     // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);
+    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+                                                           isDarwinABI);
 
     // Allocate the frame index for frame pointer save area.
     FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
@@ -3729,12 +4166,22 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
                       VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
 
   case ISD::FORMAL_ARGUMENTS:
-    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,
-                                 VarArgsStackOffset, VarArgsNumGPR,
-                                 VarArgsNumFPR, PPCSubTarget);
+    if (PPCSubTarget.isSVR4ABI()) {
+      return LowerFORMAL_ARGUMENTS_SVR4(Op, DAG, VarArgsFrameIndex,
+                                        VarArgsStackOffset, VarArgsNumGPR,
+                                        VarArgsNumFPR, PPCSubTarget);
+    } else {
+      return LowerFORMAL_ARGUMENTS_Darwin(Op, DAG, VarArgsFrameIndex,
+                                          PPCSubTarget);
+    }
 
-  case ISD::CALL:               return LowerCALL(Op, DAG, PPCSubTarget,
-                                                 getTargetMachine());
+  case ISD::CALL:
+    if (PPCSubTarget.isSVR4ABI()) {
+      return LowerCALL_SVR4(Op, DAG, PPCSubTarget, getTargetMachine());
+    } else {
+      return LowerCALL_Darwin(Op, DAG, PPCSubTarget, getTargetMachine());
+    }
+    
   case ISD::RET:                return LowerRET(Op, DAG, getTargetMachine());
   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
   case ISD::DYNAMIC_STACKALLOC:
@@ -4878,3 +5325,13 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The PowerPC target isn't yet aware of offsets.
   return false;
 }
+
+MVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
+                                           bool isSrcConst, bool isSrcStr,
+                                           SelectionDAG &DAG) const {
+  if (this->PPCSubTarget.isPPC64()) {
+    return MVT::i64;
+  } else {
+    return MVT::i32;
+  }
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b6d046f..962bbb1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -82,7 +82,7 @@ namespace llvm {
       STD_32,
       
       /// CALL - A direct function call.
-      CALL_Macho, CALL_ELF,
+      CALL_Darwin, CALL_SVR4,
       
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
@@ -90,7 +90,7 @@ namespace llvm {
       
       /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
       /// BCTRL instruction.
-      BCTRL_Macho, BCTRL_ELF,
+      BCTRL_Darwin, BCTRL_SVR4,
       
       /// Return with a flag operand, matched by 'blr'
       RET_FLAG,
@@ -223,7 +223,6 @@ namespace llvm {
                                       // register for parameter passing.
     unsigned VarArgsNumFPR;           // Index of the first unused double
                                       // register for parameter passing.
-    int ReturnAddrIndex;              // FrameIndex for return slot.
     const PPCSubtarget &PPCSubTarget;
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -336,6 +335,13 @@ namespace llvm {
                                                    SelectionDAG &DAG) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    
+    virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+                                    bool isSrcConst, bool isSrcStr,
+                                    SelectionDAG &DAG) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
 
   private:
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
@@ -346,6 +352,7 @@ namespace llvm {
                                          SDValue Chain,
                                          SDValue &LROpOut,
                                          SDValue &FPOpOut,
+                                         bool isDarwinABI,
                                          DebugLoc dl);
 
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
@@ -363,14 +370,19 @@ namespace llvm {
     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex,
                          int VarArgsStackOffset, unsigned VarArgsNumGPR,
                          unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget);
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
-                                    int &VarArgsFrameIndex, 
-                                    int &VarArgsStackOffset,
-                                    unsigned &VarArgsNumGPR,
-                                    unsigned &VarArgsNumFPR,
-                                    const PPCSubtarget &Subtarget);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG,
-                        const PPCSubtarget &Subtarget, TargetMachine &TM);
+    SDValue LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, SelectionDAG &DAG,
+                                       int &VarArgsFrameIndex, 
+                                       int &VarArgsStackOffset,
+                                       unsigned &VarArgsNumGPR,
+                                       unsigned &VarArgsNumFPR,
+                                       const PPCSubtarget &Subtarget);
+    SDValue LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, SelectionDAG &DAG,
+                                         int &VarArgsFrameIndex, 
+                                         const PPCSubtarget &Subtarget);
+    SDValue LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
+                             const PPCSubtarget &Subtarget, TargetMachine &TM);
+    SDValue LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
+                           const PPCSubtarget &Subtarget, TargetMachine &TM);
     SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM);
     SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                 const PPCSubtarget &Subtarget);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 417c8ed..3823e53 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -61,7 +61,7 @@ let Defs = [LR8] in
   def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
                     PPC970_Unit_BRU;
 
-// Macho ABI Calls.
+// Darwin ABI Calls.
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the PPC64 non-callee saved registers.
   Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
@@ -71,22 +71,22 @@ let isCall = 1, PPC970_Unit = 7,
           CR0,CR1,CR5,CR6,CR7] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL8_Macho  : IForm<18, 0, 1,
-                           (outs), (ins calltarget:$func, variable_ops), 
-                           "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA8_Macho : IForm<18, 1, 1,
-                           (outs), (ins aaddr:$func, variable_ops),
-                           "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>;
+    def BL8_Darwin  : IForm<18, 0, 1,
+                            (outs), (ins calltarget:$func, variable_ops), 
+                            "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA8_Darwin : IForm<18, 1, 1,
+                          (outs), (ins aaddr:$func, variable_ops),
+                          "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
   }
   let Uses = [CTR8, RM] in {
-    def BCTRL8_Macho : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                 (outs), (ins variable_ops),
-                                 "bctrl", BrB,
-                                 [(PPCbctrl_Macho)]>, Requires<[In64BitMode]>;
+    def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
   }
 }
 
-// ELF 64 ABI Calls = Macho ABI Calls
+// ELF 64 ABI Calls = Darwin ABI Calls
 // Used to define BL8_ELF and BLA8_ELF
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the PPC64 non-callee saved registers.
@@ -102,26 +102,26 @@ let isCall = 1, PPC970_Unit = 7,
                          "bl $func", BrB, []>;  // See Pat patterns below.                            
     def BLA8_ELF : IForm<18, 1, 1,
                          (outs), (ins aaddr:$func, variable_ops),
-                         "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>;
+                         "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
   }
   let Uses = [CTR8, RM] in {
     def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
                                (outs), (ins variable_ops),
                                "bctrl", BrB,
-                               [(PPCbctrl_ELF)]>, Requires<[In64BitMode]>;
+                               [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
   }
 }
 
 
 // Calls
-def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)),
-          (BL8_Macho tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)),
-          (BL8_Macho texternalsym:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
+          (BL8_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
+          (BL8_Darwin texternalsym:$dst)>;
 
-def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)),
+def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
           (BL8_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
+def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
           (BL8_ELF texternalsym:$dst)>;
 
 // Atomic operations
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 778f034..87c612a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -444,21 +444,29 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
     // not cause any bug. If we need other uses of CR bits, the following
     // code may be invalid.
     unsigned Reg = 0;
-    if (SrcReg >= PPC::CR0LT || SrcReg <= PPC::CR0UN) 
+    if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+        SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
       Reg = PPC::CR0;
-    else if (SrcReg >= PPC::CR1LT || SrcReg <= PPC::CR1UN) 
+    else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+             SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
       Reg = PPC::CR1;
-    else if (SrcReg >= PPC::CR2LT || SrcReg <= PPC::CR2UN) 
+    else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+             SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
       Reg = PPC::CR2;
-    else if (SrcReg >= PPC::CR3LT || SrcReg <= PPC::CR3UN) 
+    else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+             SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
       Reg = PPC::CR3;
-    else if (SrcReg >= PPC::CR4LT || SrcReg <= PPC::CR4UN) 
+    else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+             SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
       Reg = PPC::CR4;
-    else if (SrcReg >= PPC::CR5LT || SrcReg <= PPC::CR5UN) 
+    else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+             SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
       Reg = PPC::CR5;
-    else if (SrcReg >= PPC::CR6LT || SrcReg <= PPC::CR6UN) 
+    else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+             SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
       Reg = PPC::CR6;
-    else if (SrcReg >= PPC::CR7LT || SrcReg <= PPC::CR7UN) 
+    else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+             SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
       Reg = PPC::CR7;
 
     return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, 
@@ -587,21 +595,29 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
   } else if (RC == PPC::CRBITRCRegisterClass) {
    
     unsigned Reg = 0;
-    if (DestReg >= PPC::CR0LT || DestReg <= PPC::CR0UN) 
+    if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
+        DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
       Reg = PPC::CR0;
-    else if (DestReg >= PPC::CR1LT || DestReg <= PPC::CR1UN) 
+    else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
+             DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
       Reg = PPC::CR1;
-    else if (DestReg >= PPC::CR2LT || DestReg <= PPC::CR2UN) 
+    else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
+             DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
       Reg = PPC::CR2;
-    else if (DestReg >= PPC::CR3LT || DestReg <= PPC::CR3UN) 
+    else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
+             DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
       Reg = PPC::CR3;
-    else if (DestReg >= PPC::CR4LT || DestReg <= PPC::CR4UN) 
+    else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
+             DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
       Reg = PPC::CR4;
-    else if (DestReg >= PPC::CR5LT || DestReg <= PPC::CR5UN) 
+    else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
+             DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
       Reg = PPC::CR5;
-    else if (DestReg >= PPC::CR6LT || DestReg <= PPC::CR6UN) 
+    else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
+             DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
       Reg = PPC::CR6;
-    else if (DestReg >= PPC::CR7LT || DestReg <= PPC::CR7UN) 
+    else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
+             DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
       Reg = PPC::CR7;
 
     return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, 
@@ -691,16 +707,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   } else if ((Opc == PPC::OR8 &&
@@ -708,48 +729,63 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   } else if (Opc == PPC::FMRD) {
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   } else if (Opc == PPC::FMRS) {
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 772e25a..7af59a2 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -107,17 +107,17 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
 def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def PPCcall_ELF   : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall,
+def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def PPCbctrl_Macho  : SDNode<"PPCISD::BCTRL_Macho", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
-def PPCbctrl_ELF  : SDNode<"PPCISD::BCTRL_ELF", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
                            [SDNPHasChain, SDNPOptInFlag]>;
@@ -412,7 +412,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
                   /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
 }
 
-// Macho ABI Calls.
+// Darwin ABI Calls.
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the non-callee saved registers...
   Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
@@ -424,26 +424,26 @@ let isCall = 1, PPC970_Unit = 7,
           CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL_Macho  : IForm<18, 0, 1,
-                          (outs), (ins calltarget:$func, variable_ops), 
-                          "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_Macho : IForm<18, 1, 1, 
+    def BL_Darwin  : IForm<18, 0, 1,
+                           (outs), (ins calltarget:$func, variable_ops), 
+                           "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA_Darwin : IForm<18, 1, 1, 
                           (outs), (ins aaddr:$func, variable_ops),
-                          "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>;
+                          "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
   }
   let Uses = [CTR, RM] in {
-    def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                 (outs), (ins variable_ops),
-                                 "bctrl", BrB,
-                                 [(PPCbctrl_Macho)]>, Requires<[In32BitMode]>;
+    def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
   }
 }
 
-// ELF ABI Calls.
+// SVR4 ABI Calls.
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the non-callee saved registers...
-  Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,
+  Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
           V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
           LR,CTR,
           CR0,CR1,CR5,CR6,CR7,
@@ -451,19 +451,19 @@ let isCall = 1, PPC970_Unit = 7,
           CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL_ELF  : IForm<18, 0, 1,
+    def BL_SVR4  : IForm<18, 0, 1,
                         (outs), (ins calltarget:$func, variable_ops), 
                         "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_ELF : IForm<18, 1, 1,
+    def BLA_SVR4 : IForm<18, 1, 1,
                         (outs), (ins aaddr:$func, variable_ops),
                         "bla $func", BrB,
-                        [(PPCcall_ELF (i32 imm:$func))]>;
+                        [(PPCcall_SVR4 (i32 imm:$func))]>;
   }
   let Uses = [CTR, RM] in {
-    def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
-                               (outs), (ins variable_ops),
-                               "bctrl", BrB,
-                               [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>;
+    def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
+                                (outs), (ins variable_ops),
+                                "bctrl", BrB,
+                                [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
   }
 }
 
@@ -1389,14 +1389,14 @@ def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
           (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
 
 // Calls
-def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)),
-          (BL_Macho tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)),
-          (BL_Macho texternalsym:$dst)>;
-def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)),
-          (BL_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)),
-          (BL_ELF texternalsym:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
+          (BL_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
+          (BL_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
+          (BL_SVR4 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
+          (BL_SVR4 texternalsym:$dst)>;
 
 
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 035647e..7486d74 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -354,7 +354,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
     JCE.emitWordBE(0xf821ffb1);     // stdu r1,-80(r1)
     JCE.emitWordBE(0x7d6802a6);     // mflr r11
     JCE.emitWordBE(0xf9610060);     // std r11, 96(r1)
-  } else if (TM.getSubtargetImpl()->isMachoABI()){
+  } else if (TM.getSubtargetImpl()->isDarwinABI()){
     JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
     JCE.emitWordBE(0x7d6802a6);     // mflr r11
     JCE.emitWordBE(0x91610028);     // stw r11, 40(r1)
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index cb31506..97b1c57 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -149,7 +149,7 @@ const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const {
 const unsigned*
 PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   // 32-bit Darwin calling convention. 
-  static const unsigned Macho32_CalleeSavedRegs[] = {
+  static const unsigned Darwin32_CalleeSavedRegs[] = {
               PPC::R13, PPC::R14, PPC::R15,
     PPC::R16, PPC::R17, PPC::R18, PPC::R19,
     PPC::R20, PPC::R21, PPC::R22, PPC::R23,
@@ -174,15 +174,13 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::LR,  0
   };
   
-  static const unsigned ELF32_CalleeSavedRegs[] = {
-              PPC::R13, PPC::R14, PPC::R15,
+  static const unsigned SVR4_CalleeSavedRegs[] = {
+                        PPC::R14, PPC::R15,
     PPC::R16, PPC::R17, PPC::R18, PPC::R19,
     PPC::R20, PPC::R21, PPC::R22, PPC::R23,
     PPC::R24, PPC::R25, PPC::R26, PPC::R27,
     PPC::R28, PPC::R29, PPC::R30, PPC::R31,
 
-                                  PPC::F9,
-    PPC::F10, PPC::F11, PPC::F12, PPC::F13,
     PPC::F14, PPC::F15, PPC::F16, PPC::F17,
     PPC::F18, PPC::F19, PPC::F20, PPC::F21,
     PPC::F22, PPC::F23, PPC::F24, PPC::F25,
@@ -190,6 +188,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::F30, PPC::F31,
     
     PPC::CR2, PPC::CR3, PPC::CR4,
+    
+    PPC::VRSAVE,
+    
     PPC::V20, PPC::V21, PPC::V22, PPC::V23,
     PPC::V24, PPC::V25, PPC::V26, PPC::V27,
     PPC::V28, PPC::V29, PPC::V30, PPC::V31,
@@ -201,7 +202,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::LR,  0
   };
   // 64-bit Darwin calling convention. 
-  static const unsigned Macho64_CalleeSavedRegs[] = {
+  static const unsigned Darwin64_CalleeSavedRegs[] = {
     PPC::X14, PPC::X15,
     PPC::X16, PPC::X17, PPC::X18, PPC::X19,
     PPC::X20, PPC::X21, PPC::X22, PPC::X23,
@@ -226,18 +227,17 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::LR8,  0
   };
   
-  if (Subtarget.isMachoABI())
-    return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs :
-                                 Macho32_CalleeSavedRegs;
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
+                                 Darwin32_CalleeSavedRegs;
   
-  // ELF 32.
-  return ELF32_CalleeSavedRegs;
+  return SVR4_CalleeSavedRegs;
 }
 
 const TargetRegisterClass* const*
 PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
-  // 32-bit Macho calling convention. 
-  static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = {
+  // 32-bit Darwin calling convention.
+  static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = {
                        &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
@@ -266,15 +266,13 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::GPRCRegClass, 0
   };
   
-  static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = {
-                       &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+  static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = {
+                                          &PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
 
-                                                             &PPC::F8RCRegClass,
-    &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
     &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
     &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
     &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
@@ -283,6 +281,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     
     &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
     
+    &PPC::VRSAVERCRegClass,
+    
     &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
     &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
     &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
@@ -297,8 +297,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::GPRCRegClass, 0
   };
   
-  // 64-bit Macho calling convention. 
-  static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = {
+  // 64-bit Darwin calling convention.
+  static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = {
     &PPC::G8RCRegClass,&PPC::G8RCRegClass,
     &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
     &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
@@ -327,12 +327,11 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::G8RCRegClass, 0
   };
   
-  if (Subtarget.isMachoABI())
-    return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses :
-                                 Macho32_CalleeSavedRegClasses;
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses :
+                                 Darwin32_CalleeSavedRegClasses;
   
-  // ELF 32.
-  return ELF32_CalleeSavedRegClasses;
+  return SVR4_CalleeSavedRegClasses;
 }
 
 // needsFP - Return true if the specified function should have a dedicated frame
@@ -358,10 +357,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(PPC::LR8);
   Reserved.set(PPC::RM);
 
-  // In Linux, r2 is reserved for the OS.
-  if (!Subtarget.isDarwin())
-    Reserved.set(PPC::R2);
-
+  // The SVR4 ABI reserves r2 and r13
+  if (Subtarget.isSVR4ABI()) {
+    Reserved.set(PPC::R2);  // System-reserved register
+    Reserved.set(PPC::R13); // Small Data Area pointer register
+  }
+  
   // On PPC64, r13 is the thread pointer. Never allocate this register. Note
   // that this is over conservative, as it also prevents allocation of R31 when
   // the FP is not needed.
@@ -909,6 +910,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
   // to adjust the stack pointer (we fit in the Red Zone).
   bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+  // FIXME SVR4 The SVR4 ABI has no red zone.
   if (!DisableRedZone &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
@@ -925,7 +927,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   // Maximum call frame needs to be at least big enough for linkage and 8 args.
   unsigned minCallFrameSize =
     PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(), 
-                                      Subtarget.isMachoABI());
+                                      Subtarget.isDarwinABI());
   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
 
   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -958,16 +960,15 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   //  Save R31 if necessary
   int FPSI = FI->getFramePointerSaveIndex();
   bool IsPPC64 = Subtarget.isPPC64();
-  bool IsELF32_ABI = Subtarget.isELF32_ABI();
-  bool IsMachoABI  = Subtarget.isMachoABI();
+  bool IsSVR4ABI = Subtarget.isSVR4ABI();
+  bool isDarwinABI  = Subtarget.isDarwinABI();
   MachineFrameInfo *MFI = MF.getFrameInfo();
  
   // If the frame pointer save index hasn't been defined yet.
-  if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects()) &&
-      IsELF32_ABI) {
+  if (!FPSI && needsFP(MF) && IsSVR4ABI) {
     // Find out what the fix offset of the frame pointer save area.
     int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
-                                                           IsMachoABI);
+                                                           isDarwinABI);
     // Allocate the frame index for frame pointer save area.
     FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
     // Save the result.
@@ -976,11 +977,10 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Reserve stack space to move the linkage area to in case of a tail call.
   int TCSPDelta = 0;
-  if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) {
-    int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0;
-    MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta,
-                                          AddFPOffsetAmount + TCSPDelta);
+  if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+    MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta);
   }
+  
   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
   // a large stack, which will require scavenging a register to materialize a
   // large offset.
@@ -999,6 +999,170 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 }
 
 void
+PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+                                                     const {
+  // Early exit if not using the SVR4 ABI.
+  if (!Subtarget.isSVR4ABI()) {
+    return;
+  }
+  
+  // Get callee saved register information.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty() && !needsFP(MF)) {
+    return;
+  }
+  
+  unsigned MinGPR = PPC::R31;
+  unsigned MinFPR = PPC::F31;
+  unsigned MinVR = PPC::V31;
+  
+  bool HasGPSaveArea = false;
+  bool HasFPSaveArea = false;
+  bool HasCRSaveArea = false;
+  bool HasVRSAVESaveArea = false;
+  bool HasVRSaveArea = false;
+  
+  SmallVector<CalleeSavedInfo, 18> GPRegs;
+  SmallVector<CalleeSavedInfo, 18> FPRegs;
+  SmallVector<CalleeSavedInfo, 18> VRegs;
+  
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RC = CSI[i].getRegClass();
+    
+    if (RC == PPC::GPRCRegisterClass) {
+      HasGPSaveArea = true;
+      
+      GPRegs.push_back(CSI[i]);
+      
+      if (Reg < MinGPR) {
+        MinGPR = Reg;
+      }
+    } else if (RC == PPC::F8RCRegisterClass) {
+      HasFPSaveArea = true;
+      
+      FPRegs.push_back(CSI[i]);
+      
+      if (Reg < MinFPR) {
+        MinFPR = Reg;
+      }
+// FIXME SVR4: Disable CR save area for now.
+    } else if (   RC == PPC::CRBITRCRegisterClass
+               || RC == PPC::CRRCRegisterClass) {
+//      HasCRSaveArea = true;
+    } else if (RC == PPC::VRSAVERCRegisterClass) {
+      HasVRSAVESaveArea = true;
+    } else if (RC == PPC::VRRCRegisterClass) {
+      HasVRSaveArea = true;
+      
+      VRegs.push_back(CSI[i]);
+      
+      if (Reg < MinVR) {
+        MinVR = Reg;
+      }
+    } else {
+      assert(0 && "Unknown RegisterClass!");
+    }
+  }
+
+  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+  
+  int64_t LowerBound = 0;
+
+  // Take into account stack space reserved for tail calls.
+  int TCSPDelta = 0;
+  if (PerformTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+    LowerBound = TCSPDelta;
+  }
+
+  // The Floating-point register save area is right below the back chain word
+  // of the previous stack frame.
+  if (HasFPSaveArea) {
+    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+      int FI = FPRegs[i].getFrameIdx();
+      
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+    
+    LowerBound -= (31 - getRegisterNumbering(MinFPR) + 1) * 8; 
+  }
+
+  // Check whether the frame pointer register is allocated. If so, make sure it
+  // is spilled to the correct offset.
+  if (needsFP(MF)) {
+    HasGPSaveArea = true;
+    
+    int FI = PFI->getFramePointerSaveIndex();
+    assert(FI && "No Frame Pointer Save Slot!");
+    
+    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+  }
+  
+  // General register save area starts right below the Floating-point
+  // register save area.
+  if (HasGPSaveArea) {
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+      int FI = GPRegs[i].getFrameIdx();
+      
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+    
+    LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4;
+  }
+  
+  // The CR save area is below the general register save area.
+  if (HasCRSaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the CR/CRBIT register class?
+    // Adjust the frame index of the CR spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      const TargetRegisterClass *RC = CSI[i].getRegClass();
+    
+      if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+    
+    LowerBound -= 4; // The CR save area is always 4 bytes long.
+  }
+  
+  if (HasVRSAVESaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the VRSAVE register class?
+    // Adjust the frame index of the VRSAVE spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      const TargetRegisterClass *RC = CSI[i].getRegClass();
+    
+      if (RC == PPC::VRSAVERCRegisterClass) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+    
+    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+  }
+  
+  if (HasVRSaveArea) {
+    // Insert alignment padding, we need 16-byte alignment.
+    LowerBound = (LowerBound - 15) & ~(15);
+    
+    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+      int FI = VRegs[i].getFrameIdx();
+      
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+  }
+}
+
+void
 PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1033,15 +1197,26 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   // Get processor type.
   bool IsPPC64 = Subtarget.isPPC64();
   // Get operating system
-  bool IsMachoABI = Subtarget.isMachoABI();
+  bool isDarwinABI = Subtarget.isDarwinABI();
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF) && FrameSize;
   
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
-  int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+    }
+  }
 
   if (IsPPC64) {
     if (MustSaveLR)
@@ -1242,15 +1417,26 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
   // Get processor type.
   bool IsPPC64 = Subtarget.isPPC64();
   // Get operating system
-  bool IsMachoABI = Subtarget.isMachoABI();
+  bool isDarwinABI = Subtarget.isDarwinABI();
   // Check if the link register (LR) has been saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF) && FrameSize;
   
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
-  int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+    }
+  }
   
   bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
     RetOpcode == PPC::TCRETURNdi ||
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 9506b65..ddaefdd 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -75,6 +75,8 @@ public:
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 9e15a55..bac8e3a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -43,8 +43,9 @@ class VR<bits<5> num, string n> : PPCReg<n> {
 }
 
 // CR - One of the 8 4-bit condition registers
-class CR<bits<3> num, string n> : PPCReg<n> {
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
   field bits<3> Num = num;
+  let SubRegs = subregs;
 }
 
 // CRBIT - One of the 32 1-bit condition register fields
@@ -189,16 +190,6 @@ def V29 : VR<29, "v29">, DwarfRegNum<[106]>;
 def V30 : VR<30, "v30">, DwarfRegNum<[107]>;
 def V31 : VR<31, "v31">, DwarfRegNum<[108]>;
 
-// Condition registers
-def CR0 : CR<0, "cr0">, DwarfRegNum<[68]>;
-def CR1 : CR<1, "cr1">, DwarfRegNum<[69]>;
-def CR2 : CR<2, "cr2">, DwarfRegNum<[70]>;
-def CR3 : CR<3, "cr3">, DwarfRegNum<[71]>;
-def CR4 : CR<4, "cr4">, DwarfRegNum<[72]>;
-def CR5 : CR<5, "cr5">, DwarfRegNum<[73]>;
-def CR6 : CR<6, "cr6">, DwarfRegNum<[74]>;
-def CR7 : CR<7, "cr7">, DwarfRegNum<[75]>;
-
 // Condition register bits
 def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>;
 def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>;
@@ -233,6 +224,16 @@ def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>;
 def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
 def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
 
+// Condition registers
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>;
+
 def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
                    [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
 def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
@@ -290,7 +291,12 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
       // On PPC64, r13 is the thread pointer.  Never allocate this register.
       // Note that this is overconservative, as it also prevents allocation of
       // R31 when the FP is not needed.
-      if (MF.getTarget().getSubtarget<PPCSubtarget>().isPPC64())
+      // When using the SVR4 ABI, r13 is reserved for the Small Data Area
+      // pointer.
+      const PPCSubtarget &Subtarget
+        = MF.getTarget().getSubtarget<PPCSubtarget>();
+         
+      if (Subtarget.isPPC64() || Subtarget.isSVR4ABI())
         return end()-5;  // don't allocate R13, R31, R0, R1, LR
         
       if (needsFP(MF))
@@ -324,22 +330,24 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
   }];
 }
 
-
-
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
 def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
-  F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
+  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
 def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
-  F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
+  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
 
 def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
  [V2, V3, V4, V5, V0, V1, 
-  V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21,
-  V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>;
-
-def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
-  CR3, CR4]>;
+  V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+  V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>;
 
 def CRBITRC : RegisterClass<"PPC", [i32], 32,
   [CR0LT, CR0GT, CR0EQ, CR0UN,
@@ -355,6 +363,13 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
   let CopyCost = -1;
 }
 
+def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
+  CR3, CR4]>
+{
+  let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC];
+}
 
 def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
 def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>;
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 176f3e1..f633cc6 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -148,8 +148,8 @@ public:
   /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
   unsigned getDarwinVers() const { return DarwinVers; }
 
-  bool isMachoABI() const { return isDarwin() || IsPPC64; }
-  bool isELF32_ABI() const { return !isDarwin() && !IsPPC64; }
+  bool isDarwinABI() const { return isDarwin() || IsPPC64; }
+  bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; }
 
   unsigned getAsmFlavor() const {
     return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2f95d7e..e9073d6 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -152,7 +152,7 @@ bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                           raw_ostream &Out) {
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
 
   return false;
 }
@@ -183,7 +183,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -215,7 +215,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -230,7 +230,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -245,7 +245,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 086d2f4..c693bf4 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -46,7 +46,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             PPCTargetMachine &tm, 
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 688fb30..6e9e6c7 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -180,6 +180,16 @@ void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
 
 ===-------------------------------------------------------------------------===
 
+Darwin Stub removal:
+
+We still generate calls to foo$stub, and stubs, on Darwin.  This is not
+necessary when building with the Leopard (10.5) or later linker, as stubs are
+generated by ld when necessary.  Parameterizing this based on the deployment
+target (-mmacosx-version-min) is probably enough.  x86-32 does this right, see
+its logic.
+
+===-------------------------------------------------------------------------===
+
 Darwin Stub LICM optimization:
 
 Loops like this:
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index cb23f62..71bd0de 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -50,9 +50,8 @@ namespace {
     unsigned BBNumber;
   public:
     explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(O, TM, T, OL, V), BBNumber(0) {}
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), BBNumber(0) {}
 
     virtual const char *getPassName() const {
       return "Sparc Assembly Printer";
@@ -84,9 +83,8 @@ namespace {
 ///
 FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
                                                TargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 
@@ -109,7 +107,7 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Print out the label for the function.
   const Function *F = MF.getFunction();
   SwitchToSection(TAI->SectionForGlobal(F));
-  EmitAlignment(4, F);
+  EmitAlignment(MF.getAlignment(), F);
   O << "\t.globl\t" << CurrentFnName << '\n';
 
   printVisibility(CurrentFnName, F->getVisibility());
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index eefa7e8..eb045e2 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -21,3 +21,5 @@ add_llvm_target(SparcCodeGen
   SparcTargetAsmInfo.cpp
   SparcTargetMachine.cpp
   )
+
+target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index bb03f30..c7d0ca8 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -25,7 +25,6 @@ namespace llvm {
 
   FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
   FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
   FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
   FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 3ec7e06..4c3efde 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1047,3 +1047,8 @@ SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Sparc target isn't yet aware of offsets.
   return false;
 }
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SparcTargetLowering::getFunctionAlignment(const Function *) const {
+  return 4;
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index fe6811f..27ce1b7 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -73,6 +73,9 @@ namespace llvm {
                                       MVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index d2f6b9b..12c286a 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -256,17 +256,20 @@ MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) { // COPY -> STORE
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(),
                       get(isFloat ? SP::STFri : SP::STDFri))
         .addFrameIndex(FI)
         .addImm(0)
-        .addReg(SrcReg, getKillRegState(isKill));
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
     } else {             // COPY -> LOAD
       unsigned DstReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(),
                       get(isFloat ? SP::LDFri : SP::LDDFri))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addReg(DstReg, RegState::Define |
+                getDeadRegState(isDead) | getUndefRegState(isUndef))
         .addFrameIndex(FI)
         .addImm(0);
     }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index aef238d..1343bcc 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -90,6 +90,6 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 8afcc73..ee55d3c 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -39,7 +39,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
   
diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp
index 9651e65..3631b35 100644
--- a/lib/Target/TargetELFWriterInfo.cpp
+++ b/lib/Target/TargetELFWriterInfo.cpp
@@ -24,13 +24,3 @@ TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
 
 TargetELFWriterInfo::~TargetELFWriterInfo() {}
 
-/// getFunctionAlignment - Returns the alignment for function 'F', targets
-/// with different alignment constraints should overload this method
-unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const {
-  const TargetData *TD = TM.getTargetData();
-  unsigned FnAlign = F->getAlignment();
-  unsigned TDAlign = TD->getPointerABIAlignment();
-  unsigned Align = std::max(FnAlign, TDAlign);
-  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
-  return Align;
-}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index e75cfc5..127f228 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -154,21 +154,13 @@ void X86ATTAsmPrinter::decorateName(std::string &Name,
   }
 }
 
-
-
 void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+  unsigned FnAlign = MF.getAlignment();
   const Function *F = MF.getFunction();
 
   decorateName(CurrentFnName, F);
 
   SwitchToSection(TAI->SectionForGlobal(F));
-
-  // FIXME: A function's alignment should be part of MachineFunction.  There
-  // shouldn't be a policy decision here.
-  unsigned FnAlign = 4;
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
-  
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index bd96115..f47daf1 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -38,9 +38,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   MCStreamer *Streamer;
  public:
   explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
-                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                            bool V)
-    : AsmPrinter(O, TM, T, OL, V) {
+                            const TargetAsmInfo *T, bool V)
+    : AsmPrinter(O, TM, T, V) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
     Context = 0;
     Streamer = 0;
@@ -140,6 +139,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printi128mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
+  void printi256mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
   void printf32mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
@@ -152,6 +154,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printf128mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
+  void printf256mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
   void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
     printLeaMemReference(MI, OpNo);
   }
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index d1623d6..e5d80a4 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -25,15 +25,12 @@ using namespace llvm;
 ///
 FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
                                              X86TargetMachine &tm,
-                                             CodeGenOpt::Level OptLevel,
                                              bool verbose) {
   const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
 
   if (Subtarget->isFlavorIntel())
-    return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                  OptLevel, verbose);
-  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                              OptLevel, verbose);
+    return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
+  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 namespace {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
index ceae7be..9d4df93 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -132,6 +132,7 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Print out labels for the function.
   const Function *F = MF.getFunction();
   unsigned CC = F->getCallingConv();
+  unsigned FnAlign = MF.getAlignment();
 
   // Populate function information map.  Actually, We don't want to populate
   // non-stdcall or non-fastcall functions' information right now.
@@ -141,10 +142,6 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   decorateName(CurrentFnName, F);
 
   SwitchToTextSection("_text", F);
-
-  unsigned FnAlign = 4;
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
   switch (F->getLinkage()) {
   default: assert(0 && "Unsupported linkage type!");
   case Function::PrivateLinkage:
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
index 04f2595..a724c3c 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -26,9 +26,8 @@ namespace llvm {
 
 struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
   explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
-                              const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                              bool V)
-    : AsmPrinter(O, TM, T, OL, V) {}
+                              const TargetAsmInfo *T, bool V)
+    : AsmPrinter(O, TM, T, V) {}
 
   virtual const char *getPassName() const {
     return "X86 Intel-Style Assembly Printer";
@@ -76,6 +75,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
     O << "XMMWORD PTR ";
     printMemReference(MI, OpNo);
   }
+  void printi256mem(const MachineInstr *MI, unsigned OpNo) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
   void printf32mem(const MachineInstr *MI, unsigned OpNo) {
     O << "DWORD PTR ";
     printMemReference(MI, OpNo);
@@ -92,6 +95,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
     O << "XMMWORD PTR ";
     printMemReference(MI, OpNo);
   }
+  void printf256mem(const MachineInstr *MI, unsigned OpNo) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
   void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
     O << "DWORD PTR ";
     printLeaMemReference(MI, OpNo);
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index d982990..7ea0e51 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -27,3 +27,5 @@ add_llvm_target(X86CodeGen
   X86TargetMachine.cpp
   X86FastISel.cpp
   )
+
+target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index fd13b02..22de3f6 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -46,9 +46,7 @@ FunctionPass *createX87FPRegKillInserterPass();
 /// assembly code for a MachineFunction to the given output stream,
 /// using the given target machine description.
 ///
-FunctionPass *createX86CodePrinterPass(raw_ostream &o,
-                                       X86TargetMachine &tm,
-                                       CodeGenOpt::Level OptLevel,
+FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm,
                                        bool Verbose);
 
 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4d26364..47861d5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -119,6 +119,10 @@ def : Proc<"amdfam10",        [FeatureSSE3,   FeatureSSE4A,
                                Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"barcelona",       [FeatureSSE3,   FeatureSSE4A,
                                Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"istanbul",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
+def : Proc<"shanghai",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureMMX, Feature3DNow]>;
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 315118f..912ab0e 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -59,18 +59,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
   return 0;
 }
 
-unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const {
-  unsigned FnAlign = 4;
-
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
-
-  if (F->getAlignment())
-    FnAlign = Log2_32(F->getAlignment());
-
-  return (1 << FnAlign);
-}
-
 long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const {
   if (is64Bit) {
     switch(RelTy) {
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 96485b8..2ba1a0b 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -41,10 +41,6 @@ namespace llvm {
     X86ELFWriterInfo(TargetMachine &TM);
     virtual ~X86ELFWriterInfo();
 
-    /// getFunctionAlignment - Returns the alignment for function 'F', targets
-    /// with different alignment constraints should overload this method
-    virtual unsigned getFunctionAlignment(const Function *F) const;
-
     /// getRelocationType - Returns the target specific ELF Relocation type.
     /// 'MachineRelTy' contains the object code independent relocation type
     virtual unsigned getRelocationType(unsigned MachineRelTy) const;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 8a21b35..b336d78 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -452,25 +452,38 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
     if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
       // Check to see if we've already materialized this
       // value in a register in this block.
-      if (unsigned Reg = LocalValueMap[V]) {
-        AM.Base.Reg = Reg;
+      DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
+      if (I != LocalValueMap.end() && I->second != 0) {
+        AM.Base.Reg = I->second;
         AM.GV = 0;
         return true;
       }
-      // Issue load from stub if necessary.
+      
+      // Issue load from stub.
       unsigned Opc = 0;
       const TargetRegisterClass *RC = NULL;
+      X86AddressMode StubAM;
+      StubAM.Base.Reg = AM.Base.Reg;
+      StubAM.GV = AM.GV;
+      
       if (TLI.getPointerTy() == MVT::i32) {
         Opc = X86::MOV32rm;
         RC  = X86::GR32RegisterClass;
+        
+        if (Subtarget->isPICStyleGOT() &&
+            TM.getRelocationModel() == Reloc::PIC_)
+          StubAM.GVOpFlags = X86II::MO_GOT;
+        
       } else {
         Opc = X86::MOV64rm;
         RC  = X86::GR64RegisterClass;
+        
+        if (TM.getRelocationModel() != Reloc::Static) {
+          StubAM.GVOpFlags = X86II::MO_GOTPCREL;
+          StubAM.Base.Reg = X86::RIP;
+        }
       }
 
-      X86AddressMode StubAM;
-      StubAM.Base.Reg = AM.Base.Reg;
-      StubAM.GV = AM.GV;
       unsigned ResultReg = createResultReg(RC);
       addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM);
 
@@ -1503,7 +1516,9 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
     } else if (Subtarget->isPICStyleGOT()) {
       OpFlag = X86II::MO_GOTOFF;
       PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
-    }
+    } else if (Subtarget->isPICStyleRIPRel() &&
+               TM.getCodeModel() == CodeModel::Small)
+      PICBase = X86::RIP;
   }
 
   // Create the load from the constant pool.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index ed4eb44..37027ee 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -990,16 +990,21 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   }
   case X86::FpSET_ST0_32:
   case X86::FpSET_ST0_64:
-  case X86::FpSET_ST0_80:
+  case X86::FpSET_ST0_80: {
+    unsigned Op0 = getFPReg(MI->getOperand(0));
+
     // FpSET_ST0_80 is generated by copyRegToReg for both function return
     // and inline assembly with the "st" constrain. In the latter case,
-    // it is possible for FP0 to be alive after this instruction.
-    if (!MI->killsRegister(X86::FP0)) {
-      // Duplicate ST0
-      duplicateToTop(0, 0, I);
+    // it is possible for ST(0) to be alive after this instruction.
+    if (!MI->killsRegister(X86::FP0 + Op0)) {
+      // Duplicate Op0
+      duplicateToTop(0, 7 /*temp register*/, I);
+    } else {
+      moveToTop(Op0, I);
     }
     --StackTop;   // "Forget" we have something on the top of stack!
     break;
+  }
   case X86::FpSET_ST1_32:
   case X86::FpSET_ST1_64:
   case X86::FpSET_ST1_80:
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9614e69..5a6294a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -700,6 +700,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       // Do not attempt to custom lower non-power-of-2 vectors
       if (!isPowerOf2_32(VT.getVectorNumElements()))
         continue;
+      // Do not attempt to custom lower non-128-bit vectors
+      if (!VT.is128BitVector())
+        continue;
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -718,17 +721,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     }
 
     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
-    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
-      setOperationAction(ISD::AND,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::AND,    (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::OR,     (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::OR,     (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::XOR,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::XOR,    (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::LOAD,   (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::LOAD,   (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64);
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      // Do not attempt to promote non-128-bit vectors
+      if (!VT.is128BitVector()) {
+        continue;
+      }
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
     }
 
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -775,6 +784,114 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
   }
 
+  if (!UseSoftFloat && Subtarget->hasAVX()) {
+    addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+
+    setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
+    setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
+    //setOperationAction(ISD::BUILD_VECTOR,       MVT::v8f32, Custom);
+    //setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8f32, Custom);
+    //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
+    //setOperationAction(ISD::SELECT,             MVT::v8f32, Custom);
+    //setOperationAction(ISD::VSETCC,             MVT::v8f32, Custom);
+
+    // Operations to consider commented out -v16i16 v32i8
+    //setOperationAction(ISD::ADD,                MVT::v16i16, Legal);
+    setOperationAction(ISD::ADD,                MVT::v8i32, Custom);
+    setOperationAction(ISD::ADD,                MVT::v4i64, Custom);
+    //setOperationAction(ISD::SUB,                MVT::v32i8, Legal);
+    //setOperationAction(ISD::SUB,                MVT::v16i16, Legal);
+    setOperationAction(ISD::SUB,                MVT::v8i32, Custom);
+    setOperationAction(ISD::SUB,                MVT::v4i64, Custom);
+    //setOperationAction(ISD::MUL,                MVT::v16i16, Legal);
+    setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
+
+    setOperationAction(ISD::VSETCC,             MVT::v4f64, Custom);
+    // setOperationAction(ISD::VSETCC,             MVT::v32i8, Custom);
+    // setOperationAction(ISD::VSETCC,             MVT::v16i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i32, Custom);
+
+    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i8, Custom);
+    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i16, Custom);
+    // setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8f32, Custom);
+
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f64, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i64, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f64, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
+
+#if 0
+    // Not sure we want to do this since there are no 256-bit integer
+    // operations in AVX
+
+    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+    // This includes 256-bit vectors
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      // Do not attempt to custom lower non-power-of-2 vectors
+      if (!isPowerOf2_32(VT.getVectorNumElements()))
+        continue;
+
+      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    }
+
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i64, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+    }    
+#endif
+
+#if 0
+    // Not sure we want to do this since there are no 256-bit integer
+    // operations in AVX
+
+    // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
+    // Including 256-bit vectors
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      if (!VT.is256BitVector()) {
+        continue;
+      }
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+    }
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+#endif
+  }
+
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
@@ -805,6 +922,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::MEMBARRIER);
   if (Subtarget->is64Bit())
     setTargetDAGCombine(ISD::MUL);
 
@@ -909,6 +1027,11 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
   return Table;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+}
+
 //===----------------------------------------------------------------------===//
 //               Return Value Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -5690,7 +5813,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
       Args.push_back(Entry);
       std::pair<SDValue,SDValue> CallResult =
         LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
-                    CallingConv::C, false,
+                    0, CallingConv::C, false,
                     DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
       return CallResult.second;
     }
@@ -8454,6 +8577,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+// On X86 and X86-64, atomic operations are lowered to locked instructions.
+// Locked instructions, in turn, have implicit fence semantics (all memory
+// operations are flushed before issuing the locked instruction, and the
+// are not buffered), so we can fold away the common pattern of 
+// fence-atomic-fence.
+static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
+  SDValue atomic = N->getOperand(0);
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      break;
+    default:
+      return SDValue();
+  }
+  
+  SDValue fence = atomic.getOperand(0);
+  if (fence.getOpcode() != ISD::MEMBARRIER)
+    return SDValue();
+  
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+      return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2),
+                                    atomic.getOperand(3));
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2));
+    default:
+      return SDValue();
+  }
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -8472,6 +8647,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
+  case ISD::MEMBARRIER:     return PerformMEMBARRIERCombine(N, DAG);
   }
 
   return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index fb4eb68..ffed46c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -380,7 +380,7 @@ namespace llvm {
     MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
                             bool isSrcConst, bool isSrcStr,
                             SelectionDAG &DAG) const;
-    
+
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
@@ -533,7 +533,10 @@ namespace llvm {
                    , SmallSet<Instruction*, 8> &
 #endif
                    );
-    
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
     /// make the right decision when generating code for different targets.
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index b50dd65..6359542 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -49,8 +49,10 @@ struct X86AddressMode {
   unsigned IndexReg;
   unsigned Disp;
   GlobalValue *GV;
+  unsigned GVOpFlags;
 
-  X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) {
+  X86AddressMode()
+    : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
     Base.Reg = 0;
   }
 };
@@ -113,7 +115,7 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
     assert (0);
   MIB.addImm(AM.Scale).addReg(AM.IndexReg);
   if (AM.GV)
-    return MIB.addGlobalAddress(AM.GV, AM.Disp);
+    return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
   else
     return MIB.addImm(AM.Disp);
 }
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 21f71ec..e5d84c5 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2459,7 +2459,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                getDefRegState(MO.isDef()) |
                RegState::Implicit |
                getKillRegState(MO.isKill()) |
-               getDeadRegState(MO.isDead()));
+               getDeadRegState(MO.isDead()) |
+               getUndefRegState(MO.isUndef()));
   }
   // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
   unsigned NewOpc = 0;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a6b0880..03df10d 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -180,10 +180,12 @@ def i16mem  : X86MemOperand<"printi16mem">;
 def i32mem  : X86MemOperand<"printi32mem">;
 def i64mem  : X86MemOperand<"printi64mem">;
 def i128mem : X86MemOperand<"printi128mem">;
+def i256mem : X86MemOperand<"printi256mem">;
 def f32mem  : X86MemOperand<"printf32mem">;
 def f64mem  : X86MemOperand<"printf64mem">;
 def f80mem  : X86MemOperand<"printf80mem">;
 def f128mem : X86MemOperand<"printf128mem">;
+def f256mem : X86MemOperand<"printf256mem">;
 
 // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
 // plain GR64, so that it doesn't potentially require a REX prefix.
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 996baa0..2e6f017 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -157,6 +157,24 @@ let Namespace = "X86" in {
   def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
   def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
 
+  // YMM Registers, used by AVX instructions
+  def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
+  def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
+  def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
+  def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
+  def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
+  def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
+  def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
+  def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
+  def YMM8:  Register<"ymm8">,  DwarfRegNum<[25, -2, -2]>;
+  def YMM9:  Register<"ymm9">,  DwarfRegNum<[26, -2, -2]>;
+  def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
+  def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
+  def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
+  def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
+  def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
+  def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
+
   // Floating point stack registers
   def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
   def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
@@ -229,6 +247,11 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
                    [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, 
                     R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
 
+def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,  
+                    YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
 //===----------------------------------------------------------------------===//
 // Register Class Definitions... now that we have all of the pieces, define the
 // top-level register classes.  The order specified in the register list is
@@ -755,6 +778,10 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
     }
   }];
 }
+def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
+                          [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+                           YMM8, YMM9, YMM10, YMM11,
+                           YMM12, YMM13, YMM14, YMM15]>;
 
 // Status flags registers.
 def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index f4f6cce..0d1434f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -142,7 +142,7 @@ public:
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
-  bool hasAVX() const { return hasAVX(); }
+  bool hasAVX() const { return HasAVX; }
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 67dcd01..b000914 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -226,7 +226,7 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
 
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
 
@@ -254,7 +254,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -284,7 +284,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -298,7 +298,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -312,7 +312,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index ba73ca8..90a5cc2 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -45,7 +45,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             X86TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index 5722b87..d95aab3 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -26,7 +26,6 @@ namespace llvm {
   FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
   FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS,
                                            XCoreTargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
 } // end namespace llvm;
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 4ab5d75..67cb0c8 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -58,9 +58,8 @@ namespace {
     const XCoreSubtarget &Subtarget;
   public:
     explicit XCoreAsmPrinter(raw_ostream &O, XCoreTargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(O, TM, T, OL, V), DW(0),
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), DW(0),
         Subtarget(*TM.getSubtargetImpl()) {}
 
     virtual const char *getPassName() const {
@@ -106,9 +105,8 @@ namespace {
 ///
 FunctionPass *llvm::createXCoreCodePrinterPass(raw_ostream &o,
                                                XCoreTargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 // PrintEscapedString - Print each character of the specified string, escaping
@@ -277,7 +275,7 @@ emitFunctionStart(MachineFunction &MF)
     break;
   }
   // (1 << 1) byte aligned
-  EmitAlignment(1, F, 1);
+  EmitAlignment(MF.getAlignment(), F, 1);
   if (TAI->hasDotTypeDotSizeDirective()) {
     O << "\t.type " << CurrentFnName << ",@function\n";
   }
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 93c5f59..cc11d32 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -187,6 +187,12 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned XCoreTargetLowering::
+getFunctionAlignment(const Function *) const {
+  return 1;
+}
+
 //===----------------------------------------------------------------------===//
 //  Misc Lower Operation implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 993ecbd..753ea81 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -84,6 +84,9 @@ namespace llvm {
     virtual bool isLegalAddressingMode(const AddrMode &AM,
                                        const Type *Ty) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     const XCoreTargetMachine &TM;
     const XCoreSubtarget &Subtarget;
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 09227d9..b72225f 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -69,6 +69,6 @@ bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createXCoreCodePrinterPass(Out, *this, OptLevel, Verbose));
+  PM.add(createXCoreCodePrinterPass(Out, *this, Verbose));
   return false;
 }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index cbf3a1d..7fe097c 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1928,8 +1928,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
     if (Ctors[i]) {
       CSVals[1] = Ctors[i];
     } else {
-      const Type *FTy = FunctionType::get(Type::VoidTy,
-                                          std::vector<const Type*>(), false);
+      const Type *FTy = FunctionType::get(Type::VoidTy, false);
       const PointerType *PFTy = PointerType::getUnqual(FTy);
       CSVals[1] = Constant::getNullValue(PFTy);
       CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647);
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 0b975ae..73ec9c1 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -173,4 +173,4 @@ bool PartialInliner::runOnModule(Module& M) {
   }
   
   return changed;
-}
-\ No newline at end of file
+}
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index 8c97b5d..9900368 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -92,8 +92,7 @@ void RaiseAllocations::doInitialization(Module &M) {
         // i8*(...) * malloc
         // This handles the common declaration of: 'void *malloc();'
         const FunctionType *Malloc3Type = 
-          FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
-                            std::vector<const Type*>(), true);
+          FunctionType::get(PointerType::getUnqual(Type::Int8Ty), true);
         if (TyWeHave != Malloc3Type)
           // Give up
           MallocFunc = 0;
@@ -113,14 +112,12 @@ void RaiseAllocations::doInitialization(Module &M) {
       // Check to see if the prototype was forgotten, giving us 
       // void (...) * free
       // This handles the common forward declaration of: 'void free();'
-      const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, 
-        std::vector<const Type*>(),true);
+      const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, true);
 
       if (TyWeHave != Free2Type) {
         // One last try, check to see if we can find free as 
         // int (...)* free.  This handles the case where NOTHING was declared.
-        const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, 
-          std::vector<const Type*>(),true);
+        const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, true);
         
         if (TyWeHave != Free3Type) {
           // Give up.
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index e9bee64..85e9243 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -21,6 +21,7 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -615,8 +616,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
         V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
       }
       if (AddrMode.Scale != 1)
-        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
-                                                          AddrMode.Scale),
+        V = BinaryOperator::CreateMul(V, Context->getConstantInt(IntPtrTy,
+                                                                AddrMode.Scale),
                                       "sunkaddr", InsertPt);
       Result = V;
     }
@@ -647,7 +648,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
     // Add in the Base Offset if present.
     if (AddrMode.BaseOffs) {
-      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+      Value *V = Context->getConstantInt(IntPtrTy, AddrMode.BaseOffs);
       if (Result)
         Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
       else
@@ -655,7 +656,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     }
 
     if (Result == 0)
-      SunkAddr = Constant::getNullValue(Addr->getType());
+      SunkAddr = Context->getNullValue(Addr->getType());
     else
       SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
   }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index f4a9898..f4fe15e 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -22,6 +22,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -795,7 +796,7 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig,
   // If the block is unreachable, just return undef, since this path
   // can't actually occur at runtime.
   if (!DT->isReachableFromEntry(BB))
-    return Phis[BB] = UndefValue::get(orig->getType());
+    return Phis[BB] = Context->getUndef(orig->getType());
   
   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
     Value *ret = GetValueForBlock(Pred, orig, Phis);
@@ -983,7 +984,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     // Loading the allocation -> undef.
     if (isa<AllocationInst>(DepInst)) {
       ValuesPerBlock.push_back(std::make_pair(DepBB, 
-                                              UndefValue::get(LI->getType())));
+                                            Context->getUndef(LI->getType())));
       continue;
     }
   
@@ -1270,7 +1271,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   // undef value.  This can happen when loading for a fresh allocation with no
   // intervening stores, for example.
   if (isa<AllocationInst>(DepInst)) {
-    L->replaceAllUsesWith(UndefValue::get(L->getType()));
+    L->replaceAllUsesWith(Context->getUndef(L->getType()));
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
@@ -1382,9 +1383,9 @@ bool GVN::processInstruction(Instruction *I,
     BasicBlock* falseSucc = BI->getSuccessor(1);
     
     if (trueSucc->getSinglePredecessor())
-      localAvail[trueSucc]->table[condVN] = ConstantInt::getTrue();
+      localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue();
     if (falseSucc->getSinglePredecessor())
-      localAvail[falseSucc]->table[condVN] = ConstantInt::getFalse();
+      localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse();
 
     return false;
     
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 27e377f..88cf60e 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -43,6 +43,7 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/IVUsers.h"
@@ -82,10 +83,10 @@ namespace {
    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
      AU.addRequired<DominatorTree>();
      AU.addRequired<ScalarEvolution>();
-     AU.addRequiredID(LCSSAID);
      AU.addRequiredID(LoopSimplifyID);
      AU.addRequired<LoopInfo>();
      AU.addRequired<IVUsers>();
+     AU.addRequiredID(LCSSAID);
      AU.addPreserved<ScalarEvolution>();
      AU.addPreservedID(LoopSimplifyID);
      AU.addPreserved<IVUsers>();
@@ -711,18 +712,18 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   // Insert new integer induction variable.
   PHINode *NewPHI = PHINode::Create(Type::Int32Ty,
                                     PH->getName()+".int", PH);
-  NewPHI->addIncoming(ConstantInt::get(Type::Int32Ty, newInitValue),
+  NewPHI->addIncoming(Context->getConstantInt(Type::Int32Ty, newInitValue),
                       PH->getIncomingBlock(IncomingEdge));
 
   Value *NewAdd = BinaryOperator::CreateAdd(NewPHI,
-                                            ConstantInt::get(Type::Int32Ty,
+                                          Context->getConstantInt(Type::Int32Ty,
                                                              newIncrValue),
                                             Incr->getName()+".int", Incr);
   NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));
 
   // The back edge is edge 1 of newPHI, whatever it may have been in the
   // original PHI.
-  ConstantInt *NewEV = ConstantInt::get(Type::Int32Ty, intEV);
+  ConstantInt *NewEV = Context->getConstantInt(Type::Int32Ty, intEV);
   Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);
   Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1));
   ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(),
@@ -738,7 +739,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   RecursivelyDeleteTriviallyDeadInstructions(EC);
 
   // Delete old, floating point, increment instruction.
-  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+  Incr->replaceAllUsesWith(Context->getUndef(Incr->getType()));
   RecursivelyDeleteTriviallyDeadInstructions(Incr);
 
   // Replace floating induction variable, if it isn't already deleted.
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 5bd17e0..59fbd39 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -36,6 +36,7 @@
 #define DEBUG_TYPE "instcombine"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
@@ -82,6 +83,8 @@ namespace {
     static char ID; // Pass identification, replacement for typeid
     InstCombiner() : FunctionPass(&ID) {}
 
+    LLVMContext* getContext() { return Context; }
+
     /// AddToWorkList - Add the specified instruction to the worklist if it
     /// isn't already in it.
     void AddToWorkList(Instruction *I) {
@@ -140,7 +143,7 @@ namespace {
         if (Instruction *Op = dyn_cast<Instruction>(*i)) {
           AddToWorkList(Op);
           // Set the operand to undef to drop the use.
-          *i = UndefValue::get(Op->getType());
+          *i = Context->getUndef(Op->getType());
         }
       
       return R;
@@ -278,7 +281,7 @@ namespace {
       if (V->getType() == Ty) return V;
 
       if (Constant *CV = dyn_cast<Constant>(V))
-        return ConstantExpr::getCast(opc, CV, Ty);
+        return Context->getConstantExprCast(opc, CV, Ty);
       
       Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
       AddToWorkList(C);
@@ -304,7 +307,7 @@ namespace {
       } else {
         // If we are replacing the instruction with itself, this must be in a
         // segment of unreachable code, so just clobber the instruction.
-        I.replaceAllUsesWith(UndefValue::get(I.getType()));
+        I.replaceAllUsesWith(Context->getUndef(I.getType()));
         return &I;
       }
     }
@@ -525,7 +528,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
   if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
     if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
       if (isa<Constant>(I.getOperand(1))) {
-        Constant *Folded = ConstantExpr::get(I.getOpcode(),
+        Constant *Folded = Context->getConstantExpr(I.getOpcode(),
                                              cast<Constant>(I.getOperand(1)),
                                              cast<Constant>(Op->getOperand(1)));
         I.setOperand(0, Op->getOperand(0));
@@ -538,7 +541,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
           Constant *C2 = cast<Constant>(Op1->getOperand(1));
 
           // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
-          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
+          Constant *Folded = Context->getConstantExpr(I.getOpcode(), C1, C2);
           Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
                                                     Op1->getOperand(0),
                                                     Op1->getName(), &I);
@@ -565,17 +568,17 @@ bool InstCombiner::SimplifyCompare(CmpInst &I) {
 // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
 // if the LHS is a constant zero (which is the 'negate' form).
 //
-static inline Value *dyn_castNegVal(Value *V) {
+static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) {
   if (BinaryOperator::isNeg(V))
     return BinaryOperator::getNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return ConstantExpr::getNeg(C);
+    return Context->getConstantExprNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
     if (C->getType()->getElementType()->isInteger())
-      return ConstantExpr::getNeg(C);
+      return Context->getConstantExprNeg(C);
 
   return 0;
 }
@@ -584,28 +587,28 @@ static inline Value *dyn_castNegVal(Value *V) {
 // instruction if the LHS is a constant negative zero (which is the 'negate'
 // form).
 //
-static inline Value *dyn_castFNegVal(Value *V) {
+static inline Value *dyn_castFNegVal(Value *V, LLVMContext* Context) {
   if (BinaryOperator::isFNeg(V))
     return BinaryOperator::getFNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantFP *C = dyn_cast<ConstantFP>(V))
-    return ConstantExpr::getFNeg(C);
+    return Context->getConstantExprFNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
     if (C->getType()->getElementType()->isFloatingPoint())
-      return ConstantExpr::getFNeg(C);
+      return Context->getConstantExprFNeg(C);
 
   return 0;
 }
 
-static inline Value *dyn_castNotVal(Value *V) {
+static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) {
   if (BinaryOperator::isNot(V))
     return BinaryOperator::getNotArgument(V);
 
   // Constants can be considered to be not'ed values...
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return ConstantInt::get(~C->getValue());
+    return Context->getConstantInt(~C->getValue());
   return 0;
 }
 
@@ -614,7 +617,8 @@ static inline Value *dyn_castNotVal(Value *V) {
 // non-constant operand of the multiply, and set CST to point to the multiplier.
 // Otherwise, return null.
 //
-static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST,
+                                         LLVMContext* Context) {
   if (V->hasOneUse() && V->getType()->isInteger())
     if (Instruction *I = dyn_cast<Instruction>(V)) {
       if (I->getOpcode() == Instruction::Mul)
@@ -625,7 +629,7 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
           // The multiplier is really 1 << CST.
           uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
           uint32_t CSTVal = CST->getLimitedValue(BitWidth);
-          CST = ConstantInt::get(APInt(BitWidth, 1).shl(CSTVal));
+          CST = Context->getConstantInt(APInt(BitWidth, 1).shl(CSTVal));
           return I->getOperand(0);
         }
     }
@@ -654,16 +658,19 @@ static unsigned getOpcode(const Value *V) {
 }
 
 /// AddOne - Add one to a ConstantInt
-static Constant *AddOne(Constant *C) {
-  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+static Constant *AddOne(Constant *C, LLVMContext* Context) {
+  return Context->getConstantExprAdd(C, 
+    Context->getConstantInt(C->getType(), 1));
 }
 /// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(ConstantInt *C) {
-  return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+static Constant *SubOne(ConstantInt *C, LLVMContext* Context) {
+  return Context->getConstantExprSub(C, 
+    Context->getConstantInt(C->getType(), 1));
 }
 /// MultiplyOverflows - True if the multiply can not be expressed in an int
 /// this size.
-static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign,
+                              LLVMContext* Context) {
   uint32_t W = C1->getBitWidth();
   APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
   if (sign) {
@@ -690,7 +697,7 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
 /// are any bits set in the constant that are not demanded.  If so, shrink the
 /// constant and return true.
 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
-                                   APInt Demanded) {
+                                   APInt Demanded, LLVMContext* Context) {
   assert(I && "No instruction?");
   assert(OpNo < I->getNumOperands() && "Operand index too large");
 
@@ -705,7 +712,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
 
   // This instruction is producing bits that are not demanded. Shrink the RHS.
   Demanded &= OpC->getValue();
-  I->setOperand(OpNo, ConstantInt::get(Demanded));
+  I->setOperand(OpNo, Context->getConstantInt(Demanded));
   return true;
 }
 
@@ -837,7 +844,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   if (DemandedMask == 0) {   // Not demanding any bits from V.
     if (isa<UndefValue>(V))
       return 0;
-    return UndefValue::get(VTy);
+    return Context->getUndef(VTy);
   }
   
   if (Depth == 6)        // Limit search depth.
@@ -879,7 +886,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       
       // If all of the demanded bits in the inputs are known zeros, return zero.
       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-        return Constant::getNullValue(VTy);
+        return Context->getNullValue(VTy);
       
     } else if (I->getOpcode() == Instruction::Or) {
       // We can simplify (X|Y) -> X or Y in the user's context if we know that
@@ -948,10 +955,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     
     // If all of the demanded bits in the inputs are known zeros, return zero.
     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-      return Constant::getNullValue(VTy);
+      return Context->getNullValue(VTy);
       
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero, Context))
       return I;
       
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -988,7 +995,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return I->getOperand(1);
         
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context))
       return I;
           
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1036,7 +1043,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
       // all known
       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
-        Constant *AndC = ConstantInt::get(~RHSKnownOne & DemandedMask);
+        Constant *AndC = Context->getConstantInt(~RHSKnownOne & DemandedMask);
         Instruction *And = 
           BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
         return InsertNewInstBefore(And, *I);
@@ -1045,7 +1052,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     
     // If the RHS is a constant, see if we can simplify it.
     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context))
       return I;
     
     RHSKnownZero = KnownZeroOut;
@@ -1062,8 +1069,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
     
     // If the operands are constants, see if we can simplify them.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
-        ShrinkDemandedConstant(I, 2, DemandedMask))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context) ||
+        ShrinkDemandedConstant(I, 2, DemandedMask, Context))
       return I;
     
     // Only known if known in both the LHS and RHS.
@@ -1085,8 +1092,22 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   }
   case Instruction::BitCast:
-    if (!I->getOperand(0)->getType()->isInteger())
+    if (!I->getOperand(0)->getType()->isIntOrIntVector())
       return false;  // vector->int or fp->int?
+
+    if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+      if (const VectorType *SrcVTy =
+            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+          // Don't touch a bitcast between vectors of different element counts.
+          return false;
+      } else
+        // Don't touch a scalar-to-vector bitcast.
+        return false;
+    } else if (isa<VectorType>(I->getOperand(0)->getType()))
+      // Don't touch a vector-to-scalar bitcast.
+      return false;
+
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              RHSKnownZero, RHSKnownOne, Depth+1))
       return I;
@@ -1173,7 +1194,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
       // If the RHS of the add has bits set that can't affect the input, reduce
       // the constant.
-      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+      if (ShrinkDemandedConstant(I, 1, InDemandedBits, Context))
         return I;
       
       // Avoid excess work.
@@ -1394,10 +1415,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
           Instruction *NewVal;
           if (InputBit > ResultBit)
             NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
-                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+                    Context->getConstantInt(I->getType(), InputBit-ResultBit));
           else
             NewVal = BinaryOperator::CreateShl(I->getOperand(1),
-                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+                    Context->getConstantInt(I->getType(), ResultBit-InputBit));
           NewVal->takeName(I);
           return InsertNewInstBefore(NewVal, *I);
         }
@@ -1414,9 +1435,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   // If the client is only demanding bits that we know, return the known
   // constant.
   if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
-    Constant *C = ConstantInt::get(RHSKnownOne);
+    Constant *C = Context->getConstantInt(RHSKnownOne);
     if (isa<PointerType>(V->getType()))
-      C = ConstantExpr::getIntToPtr(C, V->getType());
+      C = Context->getConstantExprIntToPtr(C, V->getType());
     return C;
   }
   return false;
@@ -1444,13 +1465,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     return 0;
   } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
     UndefElts = EltMask;
-    return UndefValue::get(V->getType());
+    return Context->getUndef(V->getType());
   }
 
   UndefElts = 0;
   if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Undef = UndefValue::get(EltTy);
+    Constant *Undef = Context->getUndef(EltTy);
 
     std::vector<Constant*> Elts;
     for (unsigned i = 0; i != VWidth; ++i)
@@ -1465,7 +1486,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       }
 
     // If we changed the constant, return it.
-    Constant *NewCP = ConstantVector::get(Elts);
+    Constant *NewCP = Context->getConstantVector(Elts);
     return NewCP != CP ? NewCP : 0;
   } else if (isa<ConstantAggregateZero>(V)) {
     // Simplify the CAZ to a ConstantVector where the non-demanded elements are
@@ -1477,15 +1498,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       return 0;
     
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Zero = Constant::getNullValue(EltTy);
-    Constant *Undef = UndefValue::get(EltTy);
+    Constant *Zero = Context->getNullValue(EltTy);
+    Constant *Undef = Context->getUndef(EltTy);
     std::vector<Constant*> Elts;
     for (unsigned i = 0; i != VWidth; ++i) {
       Constant *Elt = DemandedElts[i] ? Zero : Undef;
       Elts.push_back(Elt);
     }
     UndefElts = DemandedElts ^ EltMask;
-    return ConstantVector::get(Elts);
+    return Context->getConstantVector(Elts);
   }
   
   // Limit search depth.
@@ -1599,12 +1620,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       std::vector<Constant*> Elts;
       for (unsigned i = 0; i < VWidth; ++i) {
         if (UndefElts[i])
-          Elts.push_back(UndefValue::get(Type::Int32Ty));
+          Elts.push_back(Context->getUndef(Type::Int32Ty));
         else
-          Elts.push_back(ConstantInt::get(Type::Int32Ty,
+          Elts.push_back(Context->getConstantInt(Type::Int32Ty,
                                           Shuffle->getMaskValue(i)));
       }
-      I->setOperand(2, ConstantVector::get(Elts));
+      I->setOperand(2, Context->getConstantVector(Elts));
       MadeChange = true;
     }
     break;
@@ -1749,8 +1770,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           }
           
           Instruction *New =
-            InsertElementInst::Create(UndefValue::get(II->getType()), TmpV, 0U,
-                                      II->getName());
+            InsertElementInst::Create(
+              Context->getUndef(II->getType()), TmpV, 0U, II->getName());
           InsertNewInstBefore(New, *II);
           AddSoonDeadInstToWorklist(*II, 0);
           return New;
@@ -1778,7 +1799,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 /// 'shouldApply' and 'apply' methods.
 ///
 template<typename Functor>
-static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
+static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F,
+                                   LLVMContext* Context) {
   unsigned Opcode = Root.getOpcode();
   Value *LHS = Root.getOperand(0);
 
@@ -1811,7 +1833,7 @@ static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
       // Make what used to be the LHS of the root be the user of the root...
       Value *ExtraOperand = TmpLHSI->getOperand(1);
       if (&Root == TmpLHSI) {
-        Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType()));
+        Root.replaceAllUsesWith(Context->getNullValue(TmpLHSI->getType()));
         return 0;
       }
       Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI
@@ -1850,11 +1872,12 @@ namespace {
 // AddRHS - Implements: X + X --> X << 1
 struct AddRHS {
   Value *RHS;
-  AddRHS(Value *rhs) : RHS(rhs) {}
+  LLVMContext* Context;
+  AddRHS(Value *rhs, LLVMContext* C) : RHS(rhs), Context(C) {}
   bool shouldApply(Value *LHS) const { return LHS == RHS; }
   Instruction *apply(BinaryOperator &Add) const {
     return BinaryOperator::CreateShl(Add.getOperand(0),
-                                     ConstantInt::get(Add.getType(), 1));
+                                     Context->getConstantInt(Add.getType(), 1));
   }
 };
 
@@ -1862,11 +1885,12 @@ struct AddRHS {
 //                 iff C1&C2 == 0
 struct AddMaskingAnd {
   Constant *C2;
-  AddMaskingAnd(Constant *c) : C2(c) {}
+  LLVMContext* Context;
+  AddMaskingAnd(Constant *c, LLVMContext* C) : C2(c), Context(C) {}
   bool shouldApply(Value *LHS) const {
     ConstantInt *C1;
     return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) &&
-           ConstantExpr::getAnd(C1, C2)->isNullValue();
+           Context->getConstantExprAnd(C1, C2)->isNullValue();
   }
   Instruction *apply(BinaryOperator &Add) const {
     return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
@@ -1877,6 +1901,8 @@ struct AddMaskingAnd {
 
 static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                              InstCombiner *IC) {
+  LLVMContext* Context = IC->getContext();
+  
   if (CastInst *CI = dyn_cast<CastInst>(&I)) {
     return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I);
   }
@@ -1887,8 +1913,8 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
 
   if (Constant *SOC = dyn_cast<Constant>(SO)) {
     if (ConstIsRHS)
-      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
-    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+      return Context->getConstantExpr(I.getOpcode(), SOC, ConstOperand);
+    return Context->getConstantExpr(I.getOpcode(), ConstOperand, SOC);
   }
 
   Value *Op0 = SO, *Op1 = ConstOperand;
@@ -1978,9 +2004,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
       Value *InV = 0;
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
         if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+          InV = Context->getConstantExprCompare(CI->getPredicate(), InC, C);
         else
-          InV = ConstantExpr::get(I.getOpcode(), InC, C);
+          InV = Context->getConstantExpr(I.getOpcode(), InC, C);
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
         if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
@@ -2005,7 +2031,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
     for (unsigned i = 0; i != NumPHIValues; ++i) {
       Value *InV;
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
-        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+        InV = Context->getConstantExprCast(CI->getOpcode(), InC, RetTy);
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
         InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
@@ -2077,8 +2103,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         if (CI->isAllOnesValue() &&
             ZI->getOperand(0)->getType() == Type::Int1Ty)
           return SelectInst::Create(ZI->getOperand(0),
-                                    Constant::getNullValue(I.getType()),
-                                    ConstantInt::getAllOnesValue(I.getType()));
+                                    Context->getNullValue(I.getType()),
+                              Context->getConstantIntAllOnesValue(I.getType()));
     }
 
     if (isa<PHINode>(LHS))
@@ -2137,7 +2163,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // X + X --> X << 1
   if (I.getType()->isInteger()) {
-    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result;
+    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS, Context), Context))
+      return Result;
 
     if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
       if (RHSI->getOpcode() == Instruction::Sub)
@@ -2153,9 +2180,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castNegVal(LHS)) {
+  if (Value *LHSV = dyn_castNegVal(LHS, Context)) {
     if (LHS->getType()->isIntOrIntVector()) {
-      if (Value *RHSV = dyn_castNegVal(RHS)) {
+      if (Value *RHSV = dyn_castNegVal(RHS, Context)) {
         Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum");
         InsertNewInstBefore(NewAdd, I);
         return BinaryOperator::CreateNeg(NewAdd);
@@ -2167,33 +2194,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // A + -B  -->  A - B
   if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castNegVal(RHS))
+    if (Value *V = dyn_castNegVal(RHS, Context))
       return BinaryOperator::CreateSub(LHS, V);
 
 
   ConstantInt *C2;
-  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+  if (Value *X = dyn_castFoldableMul(LHS, C2, Context)) {
     if (X == RHS)   // X*C + X --> X * (C+1)
-      return BinaryOperator::CreateMul(RHS, AddOne(C2));
+      return BinaryOperator::CreateMul(RHS, AddOne(C2, Context));
 
     // X*C1 + X*C2 --> X * (C1+C2)
     ConstantInt *C1;
-    if (X == dyn_castFoldableMul(RHS, C1))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+    if (X == dyn_castFoldableMul(RHS, C1, Context))
+      return BinaryOperator::CreateMul(X, Context->getConstantExprAdd(C1, C2));
   }
 
   // X + X*C --> X * (C+1)
-  if (dyn_castFoldableMul(RHS, C2) == LHS)
-    return BinaryOperator::CreateMul(LHS, AddOne(C2));
+  if (dyn_castFoldableMul(RHS, C2, Context) == LHS)
+    return BinaryOperator::CreateMul(LHS, AddOne(C2, Context));
 
   // X + ~X --> -1   since   ~X = -X-1
-  if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS)
-    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+  if (dyn_castNotVal(LHS, Context) == RHS ||
+      dyn_castNotVal(RHS, Context) == LHS)
+    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
   
 
   // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
   if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
-    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
+    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2, Context), Context))
       return R;
   
   // A+B --> A|B iff A and B have no bits set in common.
@@ -2240,11 +2268,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
     Value *X = 0;
     if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
-      return BinaryOperator::CreateSub(SubOne(CRHS), X);
+      return BinaryOperator::CreateSub(SubOne(CRHS, Context), X);
 
     // (X & FF00) + xx00  -> (X+xx00) & FF00
     if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
-      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
+      Constant *Anded = Context->getConstantExprAnd(CRHS, C2);
       if (Anded == CRHS) {
         // See if all bits from the first bit set in the Add RHS up are included
         // in the mask.  First, get the rightmost bit.
@@ -2287,7 +2315,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       unsigned AS =
         cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace();
       Value *I2 = InsertBitCastBefore(CI->getOperand(0),
-                                      PointerType::get(Type::Int8Ty, AS), I);
+                                  Context->getPointerType(Type::Int8Ty, AS), I);
       I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I);
       return new PtrToIntInst(I2, CI->getType());
     }
@@ -2323,9 +2351,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     // (add (sext x), cst) --> (sext (add x, cst'))
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
       Constant *CI = 
-        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+        Context->getConstantExprTrunc(RHSC, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
-          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+          Context->getConstantExprSExt(CI, I.getType()) == RHSC &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new, smaller add.
         Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
@@ -2364,7 +2392,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
     // X + 0 --> X
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+      if (CFP->isExactlyValue(Context->getConstantFPNegativeZero
                               (I.getType())->getValueAPF()))
         return ReplaceInstUsesWith(I, LHS);
     }
@@ -2376,12 +2404,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castFNegVal(LHS))
+  if (Value *LHSV = dyn_castFNegVal(LHS, Context))
     return BinaryOperator::CreateFSub(RHS, LHSV);
 
   // A + -B  -->  A - B
   if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castFNegVal(RHS))
+    if (Value *V = dyn_castFNegVal(RHS, Context))
       return BinaryOperator::CreateFSub(LHS, V);
 
   // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
@@ -2399,9 +2427,9 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     // instcombined.
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
       Constant *CI = 
-      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      Context->getConstantExprFPToSI(CFP, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
-          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+          Context->getConstantExprSIToFP(CI, I.getType()) == CFP &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new integer add.
         Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
@@ -2437,10 +2465,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Op0 == Op1)                        // sub X, X  -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
 
   // If this is a 'B = x-(-A)', change to B = x+A...
-  if (Value *V = dyn_castNegVal(Op1))
+  if (Value *V = dyn_castNegVal(Op1, Context))
     return BinaryOperator::CreateAdd(Op0, V);
 
   if (isa<UndefValue>(Op0))
@@ -2456,7 +2484,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // C - ~X == X + (1+C)
     Value *X = 0;
     if (match(Op1, m_Not(m_Value(X))))
-      return BinaryOperator::CreateAdd(X, AddOne(C));
+      return BinaryOperator::CreateAdd(X, AddOne(C, Context));
 
     // -(X >>u 31) -> (X >>s 31)
     // -(X >>s 31) -> (X >>u 31)
@@ -2505,8 +2533,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
         if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
           // C1-(X+C2) --> (C1-C2)-X
-          return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2),
-                                           Op1I->getOperand(0));
+          return BinaryOperator::CreateSub(
+            Context->getConstantExprSub(CI1, CI2), Op1I->getOperand(0));
       }
     }
 
@@ -2541,12 +2569,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           if (CSI->isZero())
             if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
               return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
-                                               ConstantExpr::getNeg(DivRHS));
+                                          Context->getConstantExprNeg(DivRHS));
 
       // X - X*C --> X * (1-C)
       ConstantInt *C2 = 0;
-      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
-        Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
+      if (dyn_castFoldableMul(Op1I, C2, Context) == Op0) {
+        Constant *CP1 = 
+          Context->getConstantExprSub(Context->getConstantInt(I.getType(), 1),
                                              C2);
         return BinaryOperator::CreateMul(Op0, CP1);
       }
@@ -2566,13 +2595,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   }
 
   ConstantInt *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+  if (Value *X = dyn_castFoldableMul(Op0, C1, Context)) {
     if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1));
+      return BinaryOperator::CreateMul(Op1, SubOne(C1, Context));
 
     ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
+    if (X == dyn_castFoldableMul(Op1, C2, Context))
+      return BinaryOperator::CreateMul(X, Context->getConstantExprSub(C1, C2));
   }
   return 0;
 }
@@ -2581,7 +2610,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // If this is a 'B = x-(-A)', change to B = x+A...
-  if (Value *V = dyn_castFNegVal(Op1))
+  if (Value *V = dyn_castFNegVal(Op1, Context))
     return BinaryOperator::CreateFAdd(Op0, V);
 
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
@@ -2633,7 +2662,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   // TODO: If Op1 is undef and Op0 is finite, return zero.
   if (!I.getType()->isFPOrFPVector() &&
       isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
 
   // Simplify mul instructions with a constant RHS...
   if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
@@ -2644,7 +2673,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         if (SI->getOpcode() == Instruction::Shl)
           if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
             return BinaryOperator::CreateMul(SI->getOperand(0),
-                                             ConstantExpr::getShl(CI, ShOp));
+                                        Context->getConstantExprShl(CI, ShOp));
 
       if (CI->isZero())
         return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0
@@ -2656,7 +2685,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       const APInt& Val = cast<ConstantInt>(CI)->getValue();
       if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
         return BinaryOperator::CreateShl(Op0,
-                 ConstantInt::get(Op0->getType(), Val.logBase2()));
+                 Context->getConstantInt(Op0->getType(), Val.logBase2()));
       }
     } else if (isa<VectorType>(Op1->getType())) {
       // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros.
@@ -2681,7 +2710,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0),
                                                      Op1, "tmp");
         InsertNewInstBefore(Add, I);
-        Value *C1C2 = ConstantExpr::getMul(Op1, 
+        Value *C1C2 = Context->getConstantExprMul(Op1, 
                                            cast<Constant>(Op0I->getOperand(1)));
         return BinaryOperator::CreateAdd(Add, C1C2);
         
@@ -2697,8 +2726,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
+  if (Value *Op0v = dyn_castNegVal(Op0, Context))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castNegVal(I.getOperand(1), Context))
       return BinaryOperator::CreateMul(Op0v, Op1v);
 
   // (X / Y) *  Y = X - (X % Y)
@@ -2712,7 +2741,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       Op1 = Op0;
       BO = dyn_cast<BinaryOperator>(I.getOperand(1));
     }
-    Value *Neg = dyn_castNegVal(Op1);
+    Value *Neg = dyn_castNegVal(Op1, Context);
     if (BO && BO->hasOneUse() &&
         (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) &&
         (BO->getOpcode() == Instruction::UDiv ||
@@ -2762,7 +2791,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
           isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) &&
           TIS) {
         // Shift the X value right to turn it into "all signbits".
-        Constant *Amt = ConstantInt::get(SCIOp0->getType(),
+        Constant *Amt = Context->getConstantInt(SCIOp0->getType(),
                                           SCOpTy->getPrimitiveSizeInBits()-1);
         Value *V =
           InsertNewInstBefore(
@@ -2822,8 +2851,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1)))
+  if (Value *Op0v = dyn_castFNegVal(Op0, Context))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1), Context))
       return BinaryOperator::CreateFMul(Op0v, Op1v);
 
   return Changed ? &I : 0;
@@ -2880,8 +2909,8 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
         *I = SI->getOperand(NonNullOperand);
         AddToWorkList(BBI);
       } else if (*I == SelectCond) {
-        *I = NonNullOperand == 1 ? ConstantInt::getTrue() :
-                                   ConstantInt::getFalse();
+        *I = NonNullOperand == 1 ? Context->getConstantIntTrue() :
+                                   Context->getConstantIntFalse();
         AddToWorkList(BBI);
       }
     }
@@ -2913,7 +2942,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
   if (isa<UndefValue>(Op0)) {
     if (Op0->getType()->isFPOrFPVector())
       return ReplaceInstUsesWith(I, Op0);
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
   }
 
   // X / undef -> undef
@@ -2933,12 +2962,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // (sdiv X, X) --> 1     (udiv X, X) --> 1
   if (Op0 == Op1) {
     if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
-      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
+      Constant *CI = Context->getConstantInt(Ty->getElementType(), 1);
       std::vector<Constant*> Elts(Ty->getNumElements(), CI);
-      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+      return ReplaceInstUsesWith(I, Context->getConstantVector(Elts));
     }
 
-    Constant *CI = ConstantInt::get(I.getType(), 1);
+    Constant *CI = Context->getConstantInt(I.getType(), 1);
     return ReplaceInstUsesWith(I, CI);
   }
   
@@ -2959,11 +2988,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     if (Instruction *LHS = dyn_cast<Instruction>(Op0))
       if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
         if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
-          if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv))
-            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+          if (MultiplyOverflows(RHS, LHSRHS,
+                                I.getOpcode()==Instruction::SDiv, Context))
+            return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
           else 
             return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
-                                          ConstantExpr::getMul(RHS, LHSRHS));
+                                      Context->getConstantExprMul(RHS, LHSRHS));
         }
 
     if (!RHS->isZero()) { // avoid X udiv 0
@@ -2979,7 +3009,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // 0 / X == 0, we don't need to preserve faults!
   if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
     if (LHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
 
   // It can't be division by zero, hence it must be division by one.
   if (I.getType() == Type::Int1Ty)
@@ -3008,14 +3038,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
     // if so, convert to a right shift.
     if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
       return BinaryOperator::CreateLShr(Op0, 
-               ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+            Context->getConstantInt(Op0->getType(), C->getValue().logBase2()));
 
     // X udiv C, where C >= signbit
     if (C->getValue().isNegative()) {
       Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C),
                                       I);
-      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
-                                ConstantInt::get(I.getType(), 1));
+      return SelectInst::Create(IC, Context->getNullValue(I.getType()),
+                                Context->getConstantInt(I.getType(), 1));
     }
   }
 
@@ -3028,7 +3058,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
         Value *N = RHSI->getOperand(1);
         const Type *NTy = N->getType();
         if (uint32_t C2 = C1.logBase2()) {
-          Constant *C2V = ConstantInt::get(NTy, C2);
+          Constant *C2V = Context->getConstantInt(NTy, C2);
           N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I);
         }
         return BinaryOperator::CreateLShr(Op0, N);
@@ -3046,13 +3076,13 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
           // Compute the shift amounts
           uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
           // Construct the "on true" case of the select
-          Constant *TC = ConstantInt::get(Op0->getType(), TSA);
+          Constant *TC = Context->getConstantInt(Op0->getType(), TSA);
           Instruction *TSI = BinaryOperator::CreateLShr(
                                                  Op0, TC, SI->getName()+".t");
           TSI = InsertNewInstBefore(TSI, I);
   
           // Construct the "on false" case of the select
-          Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
+          Constant *FC = Context->getConstantInt(Op0->getType(), FSA); 
           Instruction *FSI = BinaryOperator::CreateLShr(
                                                  Op0, FC, SI->getName()+".f");
           FSI = InsertNewInstBefore(FSI, I);
@@ -3104,7 +3134,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
   if (isa<UndefValue>(Op0)) {             // undef % X -> 0
     if (I.getType()->isFPOrFPVector())
       return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
   }
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
@@ -3129,15 +3159,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
   // 0 % X == 0 for integer, we don't need to preserve faults!
   if (Constant *LHS = dyn_cast<Constant>(Op0))
     if (LHS->isNullValue())
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
 
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     // X % 0 == undef, we don't need to preserve faults!
     if (RHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getUndef(I.getType()));
     
     if (RHS->equalsInt(1))  // X % 1 == 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
 
     if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
       if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
@@ -3169,7 +3199,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     // if so, convert to a bitwise and.
     if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
       if (C->getValue().isPowerOf2())
-        return BinaryOperator::CreateAnd(Op0, SubOne(C));
+        return BinaryOperator::CreateAnd(Op0, SubOne(C, Context));
   }
 
   if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
@@ -3177,7 +3207,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     if (RHSI->getOpcode() == Instruction::Shl &&
         isa<ConstantInt>(RHSI->getOperand(0))) {
       if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
-        Constant *N1 = ConstantInt::getAllOnesValue(I.getType());
+        Constant *N1 = Context->getConstantIntAllOnesValue(I.getType());
         Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1,
                                                                    "tmp"), I);
         return BinaryOperator::CreateAnd(Op0, Add);
@@ -3194,9 +3224,11 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
         if ((STO->getValue().isPowerOf2()) && 
             (SFO->getValue().isPowerOf2())) {
           Value *TrueAnd = InsertNewInstBefore(
-            BinaryOperator::CreateAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
+            BinaryOperator::CreateAnd(Op0, SubOne(STO, Context),
+                                      SI->getName()+".t"), I);
           Value *FalseAnd = InsertNewInstBefore(
-            BinaryOperator::CreateAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
+            BinaryOperator::CreateAnd(Op0, SubOne(SFO, Context),
+                                      SI->getName()+".f"), I);
           return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
         }
       }
@@ -3212,7 +3244,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   if (Instruction *common = commonIRemTransforms(I))
     return common;
   
-  if (Value *RHSNeg = dyn_castNegVal(Op1))
+  if (Value *RHSNeg = dyn_castNegVal(Op1, Context))
     if (!isa<Constant>(RHSNeg) ||
         (isa<ConstantInt>(RHSNeg) &&
          cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
@@ -3247,13 +3279,13 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
       for (unsigned i = 0; i != VWidth; ++i) {
         if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
           if (RHS->getValue().isNegative())
-            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+            Elts[i] = cast<ConstantInt>(Context->getConstantExprNeg(RHS));
           else
             Elts[i] = RHS;
         }
       }
 
-      Constant *NewRHSV = ConstantVector::get(Elts);
+      Constant *NewRHSV = Context->getConstantVector(Elts);
       if (NewRHSV != RHSV) {
         AddUsesToWorkList(I);
         I.setOperand(1, NewRHSV);
@@ -3356,10 +3388,11 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
 /// opcode and two operands into either a constant true or false, or a brand 
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
-static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
+static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
+                           LLVMContext* Context) {
   switch (code) {
   default: assert(0 && "Illegal ICmp code!");
-  case  0: return ConstantInt::getFalse();
+  case  0: return Context->getConstantIntFalse();
   case  1: 
     if (sign)
       return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
@@ -3382,7 +3415,7 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
       return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
     else
       return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
-  case  7: return ConstantInt::getTrue();
+  case  7: return Context->getConstantIntTrue();
   }
 }
 
@@ -3390,7 +3423,7 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
 /// opcode and two operands into either a FCmp instruction. isordered is passed
 /// in to determine which kind of predicate to use in the new fcmp instruction.
 static Value *getFCmpValue(bool isordered, unsigned code,
-                           Value *LHS, Value *RHS) {
+                           Value *LHS, Value *RHS, LLVMContext* Context) {
   switch (code) {
   default: assert(0 && "Illegal FCmp code!");
   case  0:
@@ -3428,7 +3461,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,
       return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
     else
       return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
-  case  7: return ConstantInt::getTrue();
+  case  7: return Context->getConstantIntTrue();
   }
 }
 
@@ -3477,7 +3510,7 @@ struct FoldICmpLogical {
     bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || 
                     ICmpInst::isSignedPredicate(ICI->getPredicate());
       
-    Value *RV = getICmpValue(isSigned, Code, LHS, RHS);
+    Value *RV = getICmpValue(isSigned, Code, LHS, RHS, IC.getContext());
     if (Instruction *I = dyn_cast<Instruction>(RV))
       return I;
     // Otherwise, it's a constant boolean value...
@@ -3496,7 +3529,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   Value *X = Op->getOperand(0);
   Constant *Together = 0;
   if (!Op->isShift())
-    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+    Together = Context->getConstantExprAnd(AndRHS, OpRHS);
 
   switch (Op->getOpcode()) {
   case Instruction::Xor:
@@ -3562,7 +3595,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
-    ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShlMask);
+    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShlMask);
 
     if (CI->getValue() == ShlMask) { 
     // Masking out bits that the shift already masks
@@ -3582,7 +3615,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-    ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShrMask);
+    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShrMask);
 
     if (CI->getValue() == ShrMask) {   
     // Masking out bits that the shift already masks.
@@ -3601,7 +3634,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
       uint32_t BitWidth = AndRHS->getType()->getBitWidth();
       uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
       APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-      Constant *C = ConstantInt::get(AndRHS->getValue() & ShrMask);
+      Constant *C = Context->getConstantInt(AndRHS->getValue() & ShrMask);
       if (C == AndRHS) {          // Masking out bits shifted in.
         // (Val ashr C1) & C2 -> (Val lshr C1) & C2
         // Make the argument unsigned.
@@ -3626,7 +3659,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                            bool isSigned, bool Inside, 
                                            Instruction &IB) {
-  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
+  assert(cast<ConstantInt>(Context->getConstantExprICmp((isSigned ? 
             ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
          "Lo is not <= Hi in range emission code!");
     
@@ -3642,10 +3675,10 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
     }
 
     // Emit V-Lo <u Hi-Lo
-    Constant *NegLo = ConstantExpr::getNeg(Lo);
+    Constant *NegLo = Context->getConstantExprNeg(Lo);
     Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
     InsertNewInstBefore(Add, IB);
-    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+    Constant *UpperBound = Context->getConstantExprAdd(NegLo, Hi);
     return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
   }
 
@@ -3653,7 +3686,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
     return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
 
   // V < Min || V >= Hi -> V > Hi-1
-  Hi = SubOne(cast<ConstantInt>(Hi));
+  Hi = SubOne(cast<ConstantInt>(Hi), Context);
   if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
     ICmpInst::Predicate pred = (isSigned ? 
         ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
@@ -3662,10 +3695,10 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
 
   // Emit V-Lo >u Hi-1-Lo
   // Note that Hi has already had one subtracted from it, above.
-  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+  ConstantInt *NegLo = cast<ConstantInt>(Context->getConstantExprNeg(Lo));
   Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
   InsertNewInstBefore(Add, IB);
-  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+  Constant *LowerBound = Context->getConstantExprAdd(NegLo, Hi);
   return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
 }
 
@@ -3707,7 +3740,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   switch (LHSI->getOpcode()) {
   default: return 0;
   case Instruction::And:
-    if (ConstantExpr::getAnd(N, Mask) == Mask) {
+    if (Context->getConstantExprAnd(N, Mask) == Mask) {
       // If the AndRHS is a power of two minus one (0+1+), this is simple.
       if ((Mask->getValue().countLeadingZeros() + 
            Mask->getValue().countPopulation()) == 
@@ -3731,7 +3764,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
     // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
     if ((Mask->getValue().countLeadingZeros() + 
          Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
-        && ConstantExpr::getAnd(N, Mask)->isNullValue())
+        && Context->getConstantExprAnd(N, Mask)->isNullValue())
       break;
     return 0;
   }
@@ -3811,7 +3844,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
     case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
     case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
     case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
     case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
@@ -3821,11 +3854,11 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     switch (RHSCC) {
     default: assert(0 && "Unknown integer condition code!");
     case ICmpInst::ICMP_ULT:
-      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X u< 14) -> X < 13
         return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
       break;                        // (X != 13 & X u< 15) -> no change
     case ICmpInst::ICMP_SLT:
-      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X s< 14) -> X < 13
         return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
       break;                        // (X != 13 & X s< 15) -> no change
     case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
@@ -3833,13 +3866,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
       return ReplaceInstUsesWith(I, RHS);
     case ICmpInst::ICMP_NE:
-      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
-        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+      if (LHSCst == SubOne(RHSCst, Context)){// (X != 13 & X != 14) -> X-13 >u 1
+        Constant *AddCST = Context->getConstantExprNeg(LHSCst);
         Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
                                                      Val->getName()+".off");
         InsertNewInstBefore(Add, I);
         return new ICmpInst(ICmpInst::ICMP_UGT, Add,
-                            ConstantInt::get(Add->getType(), 1));
+                            Context->getConstantInt(Add->getType(), 1));
       }
       break;                        // (X != 13 & X != 15) -> no change
     }
@@ -3849,7 +3882,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: assert(0 && "Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
     case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
@@ -3864,7 +3897,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: assert(0 && "Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
     case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
     case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
@@ -3883,11 +3916,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+      if (RHSCst == AddOne(LHSCst, Context)) // (X u> 13 & X != 14) -> X u> 14
         return new ICmpInst(LHSCC, Val, RHSCst);
       break;                        // (X u> 13 & X != 15) -> no change
     case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
-      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true, I);
+      return InsertRangeTest(Val, AddOne(LHSCst, Context),
+                             RHSCst, false, true, I);
     case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
       break;
     }
@@ -3901,11 +3935,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+      if (RHSCst == AddOne(LHSCst, Context)) // (X s> 13 & X != 14) -> X s> 14
         return new ICmpInst(LHSCC, Val, RHSCst);
       break;                        // (X s> 13 & X != 15) -> no change
     case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
-      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true, I);
+      return InsertRangeTest(Val, AddOne(LHSCst, Context),
+                             RHSCst, true, true, I);
     case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
       break;
     }
@@ -3921,7 +3956,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op1))                         // X & undef -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
 
   // and X, X = X
   if (Op0 == Op1)
@@ -4014,7 +4049,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // (1 >> x) & 1 --> zext(x == 0)
         if (AndRHSMask == 1 && Op0LHS == AndRHS) {
           Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS,
-                                           Constant::getNullValue(I.getType()));
+                                           Context->getNullValue(I.getType()));
           InsertNewInstBefore(NewICmp, I);
           return new ZExtInst(NewICmp, I.getType());
         }
@@ -4042,14 +4077,17 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                 CastOp->getName()+".shrunk");
               NewCast = InsertNewInstBefore(NewCast, I);
               // trunc_or_bitcast(C1)&C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              C3 = ConstantExpr::getAnd(C3, AndRHS);
+              Constant *C3 =
+                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType());
+              C3 = Context->getConstantExprAnd(C3, AndRHS);
               return BinaryOperator::CreateAnd(NewCast, C3);
             } else if (CastOp->getOpcode() == Instruction::Or) {
               // Change: and (cast (or X, C1) to T), C2
               // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)   // trunc(C1)&C2
+              Constant *C3 =
+                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType());
+              if (Context->getConstantExprAnd(C3, AndRHS) == AndRHS)
+                // trunc(C1)&C2
                 return ReplaceInstUsesWith(I, AndRHS);
             }
           }
@@ -4065,11 +4103,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         return NV;
   }
 
-  Value *Op0NotVal = dyn_castNotVal(Op0);
-  Value *Op1NotVal = dyn_castNotVal(Op1);
+  Value *Op0NotVal = dyn_castNotVal(Op0, Context);
+  Value *Op1NotVal = dyn_castNotVal(Op1, Context);
 
   if (Op0NotVal == Op1 || Op1NotVal == Op0)  // A & ~A  == ~A & A == 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
 
   // (~A & ~B) == (~(A | B)) - De Morgan's Law
   if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
@@ -4139,7 +4177,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
     // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
       return R;
 
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
@@ -4192,7 +4230,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
             // If either of the constants are nans, then the whole thing returns
             // false.
             if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-              return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+              return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
             return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0),
                                 RHS->getOperand(0));
           }
@@ -4212,7 +4250,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
             else if (Op0CC == FCmpInst::FCMP_FALSE ||
                      Op1CC == FCmpInst::FCMP_FALSE)
-              return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+              return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
             else if (Op0CC == FCmpInst::FCMP_TRUE)
               return ReplaceInstUsesWith(I, Op1);
             else if (Op1CC == FCmpInst::FCMP_TRUE)
@@ -4234,10 +4272,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               // uno && oeq -> uno && (ord && eq) -> false
               // uno && ord -> false
               if (!Op0Ordered)
-                return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+                return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
               // ord && ueq -> ord && (uno || eq) -> oeq
               return cast<Instruction>(getFCmpValue(true, Op1Pred,
-                                                    Op0LHS, Op0RHS));
+                                                    Op0LHS, Op0RHS, Context));
             }
           }
         }
@@ -4487,12 +4525,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     switch (RHSCC) {
     default: assert(0 && "Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:
-      if (LHSCst == SubOne(RHSCst)) { // (X == 13 | X == 14) -> X-13 <u 2
-        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+      if (LHSCst == SubOne(RHSCst, Context)) {
+        // (X == 13 | X == 14) -> X-13 <u 2
+        Constant *AddCST = Context->getConstantExprNeg(LHSCst);
         Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
                                                      Val->getName()+".off");
         InsertNewInstBefore(Add, I);
-        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+        AddCST = Context->getConstantExprSub(AddOne(RHSCst, Context), LHSCst);
         return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
       }
       break;                         // (X == 13 | X == 15) -> no change
@@ -4515,7 +4554,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
     case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
     }
     break;
   case ICmpInst::ICMP_ULT:
@@ -4528,7 +4567,8 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // this can cause overflow.
       if (RHSCst->isMaxValue(false))
         return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false, I);
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context),
+                             false, false, I);
     case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
@@ -4548,7 +4588,8 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // this can cause overflow.
       if (RHSCst->isMaxValue(true))
         return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false, I);
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context),
+                             true, false, I);
     case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
@@ -4568,7 +4609,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       break;
     case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
     case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
       break;
     }
@@ -4583,7 +4624,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       break;
     case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
     case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
     case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
       break;
     }
@@ -4627,7 +4668,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op1))                       // X | undef -> -1
-    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
 
   // or X, X = X
   if (Op0 == Op1)
@@ -4655,7 +4696,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       InsertNewInstBefore(Or, I);
       Or->takeName(Op0);
       return BinaryOperator::CreateAnd(Or, 
-               ConstantInt::get(RHS->getValue() | C1->getValue()));
+               Context->getConstantInt(RHS->getValue() | C1->getValue()));
     }
 
     // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
@@ -4664,7 +4705,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       InsertNewInstBefore(Or, I);
       Or->takeName(Op0);
       return BinaryOperator::CreateXor(Or,
-                 ConstantInt::get(C1->getValue() & ~RHS->getValue()));
+                 Context->getConstantInt(C1->getValue() & ~RHS->getValue()));
     }
 
     // Try to fold constant and into select arguments.
@@ -4824,14 +4865,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1
     if (A == Op1)   // ~A | A == -1
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
   } else {
     A = 0;
   }
   // Note, A is still live here!
   if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B
     if (Op0 == B)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
 
     // (~A | ~B) == (~(A & B)) - De Morgan's Law
     if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
@@ -4843,7 +4884,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
       return R;
 
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
@@ -4887,7 +4928,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
             // If either of the constants are nans, then the whole thing returns
             // true.
             if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-              return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+              return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
             
             // Otherwise, no need to compare the two constants, compare the
             // rest.
@@ -4910,7 +4951,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
               return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
             else if (Op0CC == FCmpInst::FCMP_TRUE ||
                      Op1CC == FCmpInst::FCMP_TRUE)
-              return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+              return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
             else if (Op0CC == FCmpInst::FCMP_FALSE)
               return ReplaceInstUsesWith(I, Op1);
             else if (Op1CC == FCmpInst::FCMP_FALSE)
@@ -4923,7 +4964,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
               // If both are ordered or unordered, return a new fcmp with
               // or'ed predicates.
               Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred,
-                                       Op0LHS, Op0RHS);
+                                       Op0LHS, Op0RHS, Context);
               if (Instruction *I = dyn_cast<Instruction>(RV))
                 return I;
               // Otherwise, it's a constant boolean value...
@@ -4960,14 +5001,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     if (isa<UndefValue>(Op0))
       // Handle undef ^ undef -> 0 special case. This is a common
       // idiom (misuse).
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
     return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
   }
 
   // xor X, X = 0, even if X is nested in a sequence of Xor's.
-  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
+  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1), Context)) {
     assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
   }
   
   // See if we can simplify any instructions used by the instruction whose sole 
@@ -4979,14 +5020,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
 
   // Is this a ~ operation?
-  if (Value *NotOp = dyn_castNotVal(&I)) {
+  if (Value *NotOp = dyn_castNotVal(&I, Context)) {
     // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
     // ~(~X | Y) === (X & ~Y) - De Morgan's Law
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
       if (Op0I->getOpcode() == Instruction::And || 
           Op0I->getOpcode() == Instruction::Or) {
-        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
-        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+        if (dyn_castNotVal(Op0I->getOperand(1), Context)) Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0), Context)) {
           Instruction *NotY =
             BinaryOperator::CreateNot(Op0I->getOperand(1),
                                       Op0I->getOperand(1)->getName()+".not");
@@ -5002,7 +5043,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   
   
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) {
+    if (RHS == Context->getConstantIntTrue() && Op0->hasOneUse()) {
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
         return new ICmpInst(ICI->getInversePredicate(),
@@ -5019,7 +5060,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (CI->hasOneUse() && Op0C->hasOneUse()) {
           Instruction::CastOps Opcode = Op0C->getOpcode();
           if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
-            if (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(),
+            if (RHS == Context->getConstantExprCast(Opcode, 
+                                             Context->getConstantIntTrue(),
                                              Op0C->getDestTy())) {
               Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
                                      CI->getOpcode(), CI->getInversePredicate(),
@@ -5036,9 +5078,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       // ~(c-X) == X-c-1 == X+(-c-1)
       if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
         if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
-          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
-          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
-                                              ConstantInt::get(I.getType(), 1));
+          Constant *NegOp0I0C = Context->getConstantExprNeg(Op0I0C);
+          Constant *ConstantRHS = Context->getConstantExprSub(NegOp0I0C,
+                                      Context->getConstantInt(I.getType(), 1));
           return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
         }
           
@@ -5046,26 +5088,27 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (Op0I->getOpcode() == Instruction::Add) {
           // ~(X-c) --> (-c-1)-X
           if (RHS->isAllOnesValue()) {
-            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+            Constant *NegOp0CI = Context->getConstantExprNeg(Op0CI);
             return BinaryOperator::CreateSub(
-                           ConstantExpr::getSub(NegOp0CI,
-                                             ConstantInt::get(I.getType(), 1)),
-                                          Op0I->getOperand(0));
+                           Context->getConstantExprSub(NegOp0CI,
+                                      Context->getConstantInt(I.getType(), 1)),
+                                      Op0I->getOperand(0));
           } else if (RHS->getValue().isSignBit()) {
             // (X + C) ^ signbit -> (X + C + signbit)
-            Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue());
+            Constant *C =
+                   Context->getConstantInt(RHS->getValue() + Op0CI->getValue());
             return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
 
           }
         } else if (Op0I->getOpcode() == Instruction::Or) {
           // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
           if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
-            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+            Constant *NewRHS = Context->getConstantExprOr(Op0CI, RHS);
             // Anything in both C1 and C2 is known to be zero, remove it from
             // NewRHS.
-            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
-            NewRHS = ConstantExpr::getAnd(NewRHS, 
-                                          ConstantExpr::getNot(CommonBits));
+            Constant *CommonBits = Context->getConstantExprAnd(Op0CI, RHS);
+            NewRHS = Context->getConstantExprAnd(NewRHS, 
+                                       Context->getConstantExprNot(CommonBits));
             AddToWorkList(Op0I);
             I.setOperand(0, Op0I->getOperand(0));
             I.setOperand(1, NewRHS);
@@ -5084,13 +5127,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
+  if (Value *X = dyn_castNotVal(Op0, Context))   // ~A ^ A == -1
     if (X == Op1)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
 
-  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
+  if (Value *X = dyn_castNotVal(Op1, Context))   // A ^ ~A == -1
     if (X == Op0)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
 
   
   BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
@@ -5201,7 +5244,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     
   // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
       return R;
 
   // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
@@ -5227,8 +5270,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
-static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
-  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx,
+                                   LLVMContext* Context) {
+  return cast<ConstantInt>(Context->getConstantExprExtractElement(V, Idx));
 }
 
 static bool HasAddOverflow(ConstantInt *Result,
@@ -5246,15 +5290,16 @@ static bool HasAddOverflow(ConstantInt *Result,
 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result
 /// overflowed for this type.
 static bool AddWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, bool IsSigned = false) {
-  Result = ConstantExpr::getAdd(In1, In2);
+                            Constant *In2, LLVMContext* Context,
+                            bool IsSigned = false) {
+  Result = Context->getConstantExprAdd(In1, In2);
 
   if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
-      if (HasAddOverflow(ExtractElement(Result, Idx),
-                         ExtractElement(In1, Idx),
-                         ExtractElement(In2, Idx),
+      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i);
+      if (HasAddOverflow(ExtractElement(Result, Idx, Context),
+                         ExtractElement(In1, Idx, Context),
+                         ExtractElement(In2, Idx, Context),
                          IsSigned))
         return true;
     }
@@ -5281,15 +5326,16 @@ static bool HasSubOverflow(ConstantInt *Result,
 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result
 /// overflowed for this type.
 static bool SubWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, bool IsSigned = false) {
-  Result = ConstantExpr::getSub(In1, In2);
+                            Constant *In2, LLVMContext* Context,
+                            bool IsSigned = false) {
+  Result = Context->getConstantExprSub(In1, In2);
 
   if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
-      if (HasSubOverflow(ExtractElement(Result, Idx),
-                         ExtractElement(In1, Idx),
-                         ExtractElement(In2, Idx),
+      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i);
+      if (HasSubOverflow(ExtractElement(Result, Idx, Context),
+                         ExtractElement(In1, Idx, Context),
+                         ExtractElement(In2, Idx, Context),
                          IsSigned))
         return true;
     }
@@ -5308,7 +5354,8 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
   TargetData &TD = IC.getTargetData();
   gep_type_iterator GTI = gep_type_begin(GEP);
   const Type *IntPtrTy = TD.getIntPtrType();
-  Value *Result = Constant::getNullValue(IntPtrTy);
+  LLVMContext* Context = IC.getContext();
+  Value *Result = Context->getNullValue(IntPtrTy);
 
   // Build a mask for high order bits.
   unsigned IntPtrWidth = TD.getPointerSizeInBits();
@@ -5326,20 +5373,22 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
         Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
         
         if (ConstantInt *RC = dyn_cast<ConstantInt>(Result))
-          Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
+          Result = 
+             Context->getConstantInt(RC->getValue() + APInt(IntPtrWidth, Size));
         else
           Result = IC.InsertNewInstBefore(
                    BinaryOperator::CreateAdd(Result,
-                                             ConstantInt::get(IntPtrTy, Size),
+                                        Context->getConstantInt(IntPtrTy, Size),
                                              GEP->getName()+".offs"), I);
         continue;
       }
       
-      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-      Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-      Scale = ConstantExpr::getMul(OC, Scale);
+      Constant *Scale = Context->getConstantInt(IntPtrTy, Size);
+      Constant *OC =
+              Context->getConstantExprIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = Context->getConstantExprMul(OC, Scale);
       if (Constant *RC = dyn_cast<Constant>(Result))
-        Result = ConstantExpr::getAdd(RC, Scale);
+        Result = Context->getConstantExprAdd(RC, Scale);
       else {
         // Emit an add instruction.
         Result = IC.InsertNewInstBefore(
@@ -5351,16 +5400,16 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
     // Convert to correct type.
     if (Op->getType() != IntPtrTy) {
       if (Constant *OpC = dyn_cast<Constant>(Op))
-        Op = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true);
+        Op = Context->getConstantExprIntegerCast(OpC, IntPtrTy, true);
       else
         Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy,
                                                                 true,
                                                       Op->getName()+".c"), I);
     }
     if (Size != 1) {
-      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *Scale = Context->getConstantInt(IntPtrTy, Size);
       if (Constant *OpC = dyn_cast<Constant>(Op))
-        Op = ConstantExpr::getMul(OpC, Scale);
+        Op = Context->getConstantExprMul(OpC, Scale);
       else    // We'll let instcombine(mul) convert this to a shl if possible.
         Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale,
                                                   GEP->getName()+".idx"), I);
@@ -5368,7 +5417,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
 
     // Emit an add instruction.
     if (isa<Constant>(Op) && isa<Constant>(Result))
-      Result = ConstantExpr::getAdd(cast<Constant>(Op),
+      Result = Context->getConstantExprAdd(cast<Constant>(Op),
                                     cast<Constant>(Result));
     else
       Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result,
@@ -5479,7 +5528,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
     VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
                                               true /*SExt*/, 
                                               VariableIdx->getNameStart(), &I);
-  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  Constant *OffsetVal = IC.getContext()->getConstantInt(IntPtrTy, NewOffs);
   return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
 }
 
@@ -5506,7 +5555,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
     if (Offset == 0)
       Offset = EmitGEPOffset(GEPLHS, I, *this);
     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
-                        Constant::getNullValue(Offset->getType()));
+                        Context->getNullValue(Offset->getType()));
   } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
     // If the base pointers are different, but the indices are the same, just
     // compare the base pointer.
@@ -5573,7 +5622,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
 
       if (NumDifferences == 0)   // SAME GEP?
         return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                   ConstantInt::get(Type::Int1Ty,
+                                   Context->getConstantInt(Type::Int1Ty,
                                              ICmpInst::isTrueWhenEqual(Cond)));
 
       else if (NumDifferences == 1) {
@@ -5657,9 +5706,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     Pred = ICmpInst::ICMP_NE;
     break;
   case FCmpInst::FCMP_ORD:
-    return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+    return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
   case FCmpInst::FCMP_UNO:
-    return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+    return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
   }
   
   const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
@@ -5679,8 +5728,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
           Pred == ICmpInst::ICMP_SLE)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
     }
   } else {
     // If the RHS value is > UnsignedMax, fold the comparison. This handles
@@ -5691,8 +5740,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
           Pred == ICmpInst::ICMP_ULE)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
     }
   }
   
@@ -5704,8 +5753,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
           Pred == ICmpInst::ICMP_SGE)
-        return ReplaceInstUsesWith(I,ConstantInt::getTrue());
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
     }
   }
 
@@ -5714,12 +5763,12 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   // casting the FP value to the integer value and back, checking for equality.
   // Don't do this for zero, because -0.0 is not fractional.
   Constant *RHSInt = LHSUnsigned
-    ? ConstantExpr::getFPToUI(RHSC, IntTy)
-    : ConstantExpr::getFPToSI(RHSC, IntTy);
+    ? Context->getConstantExprFPToUI(RHSC, IntTy)
+    : Context->getConstantExprFPToSI(RHSC, IntTy);
   if (!RHS.isZero()) {
     bool Equal = LHSUnsigned
-      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
-      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+      ? Context->getConstantExprUIToFP(RHSInt, RHSC->getType()) == RHSC
+      : Context->getConstantExprSIToFP(RHSInt, RHSC->getType()) == RHSC;
     if (!Equal) {
       // If we had a comparison against a fractional value, we have to adjust
       // the compare predicate and sometimes the value.  RHSC is rounded towards
@@ -5727,14 +5776,14 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
       switch (Pred) {
       default: assert(0 && "Unexpected integer comparison!");
       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       case ICmpInst::ICMP_ULE:
         // (float)int <= 4.4   --> int <= 4
         // (float)int <= -4.4  --> false
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
         break;
       case ICmpInst::ICMP_SLE:
         // (float)int <= 4.4   --> int <= 4
@@ -5746,7 +5795,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int < -4.4   --> false
         // (float)int < 4.4    --> int <= 4
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
         Pred = ICmpInst::ICMP_ULE;
         break;
       case ICmpInst::ICMP_SLT:
@@ -5759,7 +5808,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int > 4.4    --> int > 4
         // (float)int > -4.4   --> true
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+          return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
         break;
       case ICmpInst::ICMP_SGT:
         // (float)int > 4.4    --> int > 4
@@ -5771,7 +5820,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int >= -4.4   --> true
         // (float)int >= 4.4    --> int > 4
         if (!RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+          return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
         Pred = ICmpInst::ICMP_UGT;
         break;
       case ICmpInst::ICMP_SGE:
@@ -5795,9 +5844,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   // Fold trivial predicates.
   if (I.getPredicate() == FCmpInst::FCMP_FALSE)
-    return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+    return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
   if (I.getPredicate() == FCmpInst::FCMP_TRUE)
-    return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+    return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
   
   // Simplify 'fcmp pred X, X'
   if (Op0 == Op1) {
@@ -5806,11 +5855,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_UEQ:    // True if unordered or equal
     case FCmpInst::FCMP_UGE:    // True if unordered, greater than, or equal
     case FCmpInst::FCMP_ULE:    // True if unordered, less than, or equal
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
     case FCmpInst::FCMP_OGT:    // True if ordered and greater than
     case FCmpInst::FCMP_OLT:    // True if ordered and less than
     case FCmpInst::FCMP_ONE:    // True if ordered and operands are unequal
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       
     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
     case FCmpInst::FCMP_ULT:    // True if unordered or less than
@@ -5818,7 +5867,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
       // Canonicalize these to be 'fcmp uno %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_UNO);
-      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      I.setOperand(1, Context->getNullValue(Op0->getType()));
       return &I;
       
     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
@@ -5827,13 +5876,13 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
       // Canonicalize these to be 'fcmp ord %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_ORD);
-      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      I.setOperand(1, Context->getNullValue(Op0->getType()));
       return &I;
     }
   }
     
   if (isa<UndefValue>(Op1))                  // fcmp pred X, undef -> undef
-    return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
+    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty));
 
   // Handle fcmp with constant RHS
   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
@@ -5841,11 +5890,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
       if (CFP->getValueAPF().isNaN()) {
         if (FCmpInst::isOrdered(I.getPredicate()))   // True if ordered and...
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
         assert(FCmpInst::isUnordered(I.getPredicate()) &&
                "Comparison must be either ordered or unordered!");
         // True if unordered.
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       }
     }
     
@@ -5872,14 +5921,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         if (LHSI->hasOneUse()) {
           if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
             // Fold the known value into the constant operand.
-            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            Op1 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC);
             // Insert a new FCmp of the other select operand.
             Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
                                                       LHSI->getOperand(2), RHSC,
                                                       I.getName()), I);
           } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
             // Fold the known value into the constant operand.
-            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            Op2 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC);
             // Insert a new FCmp of the other select operand.
             Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
                                                       LHSI->getOperand(1), RHSC,
@@ -5903,11 +5952,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
   // icmp X, X
   if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
+    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, 
                                                    I.isTrueWhenEqual()));
 
   if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
-    return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
+    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty));
   
   // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
   // addresses never equal each other!  We already know that Op0 != Op1.
@@ -5915,7 +5964,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        isa<ConstantPointerNull>(Op0)) &&
       (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
        isa<ConstantPointerNull>(Op1)))
-    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
+    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, 
                                                    !I.isTrueWhenEqual()));
 
   // icmp's with boolean values can always be turned into bitwise operations
@@ -5991,20 +6040,20 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     default: break;
     case ICmpInst::ICMP_ULE:
       if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI, Context));
     case ICmpInst::ICMP_SLE:
       if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI, Context));
     case ICmpInst::ICMP_UGE:
       if (CI->isMinValue(false))                 // A >=u MIN -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-      return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI, Context));
     case ICmpInst::ICMP_SGE:
       if (CI->isMinValue(true))                  // A >=s MIN -> TRUE
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI));
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI, Context));
     }
     
     // If this comparison is a normal comparison, it demands all
@@ -6050,9 +6099,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // figured out that the LHS is a constant.  Just constant fold this now so
     // that code below can assume that Min != Max.
     if (!isa<Constant>(Op0) && Op0Min == Op0Max)
-      return new ICmpInst(I.getPredicate(), ConstantInt::get(Op0Min), Op1);
+      return new ICmpInst(I.getPredicate(),
+                          Context->getConstantInt(Op0Min), Op1);
     if (!isa<Constant>(Op1) && Op1Min == Op1Max)
-      return new ICmpInst(I.getPredicate(), Op0, ConstantInt::get(Op1Min));
+      return new ICmpInst(I.getPredicate(), Op0, 
+                          Context->getConstantInt(Op1Min));
 
     // Based on the range information we know about the LHS, see if we can
     // simplify this comparison.  For example, (x&4) < 8  is always true.
@@ -6060,99 +6111,99 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     default: assert(0 && "Unknown icmp opcode!");
     case ICmpInst::ICMP_EQ:
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       break;
     case ICmpInst::ICMP_NE:
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       break;
     case ICmpInst::ICMP_ULT:
       if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context));
 
         // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
         if (CI->isMinValue(true))
           return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
-                            ConstantInt::getAllOnesValue(Op0->getType()));
+                           Context->getConstantIntAllOnesValue(Op0->getType()));
       }
       break;
     case ICmpInst::ICMP_UGT:
       if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
 
       if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context));
 
         // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
         if (CI->isMaxValue(true))
           return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
-                              ConstantInt::getNullValue(Op0->getType()));
+                              Context->getNullValue(Op0->getType()));
       }
       break;
     case ICmpInst::ICMP_SLT:
       if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context));
       }
       break;
     case ICmpInst::ICMP_SGT:
       if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
 
       if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context));
       }
       break;
     case ICmpInst::ICMP_SGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
       if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       break;
     case ICmpInst::ICMP_SLE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
       if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       break;
     case ICmpInst::ICMP_UGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
       if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       break;
     case ICmpInst::ICMP_ULE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
       if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
       if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
       break;
     }
 
@@ -6204,7 +6255,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
             }
           if (isAllZeros)
             return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
-                    Constant::getNullValue(LHSI->getOperand(0)->getType()));
+                    Context->getNullValue(LHSI->getOperand(0)->getType()));
         }
         break;
 
@@ -6224,14 +6275,14 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         if (LHSI->hasOneUse()) {
           if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
             // Fold the known value into the constant operand.
-            Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+            Op1 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC);
             // Insert a new ICmp of the other select operand.
             Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
                                                    LHSI->getOperand(2), RHSC,
                                                    I.getName()), I);
           } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
             // Fold the known value into the constant operand.
-            Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+            Op2 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC);
             // Insert a new ICmp of the other select operand.
             Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
                                                    LHSI->getOperand(1), RHSC,
@@ -6248,7 +6299,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         // can assume it is successful and remove the malloc.
         if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
           AddToWorkList(LHSI);
-          return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
+          return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty,
                                                          !I.isTrueWhenEqual()));
         }
         break;
@@ -6282,7 +6333,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // If Op1 is a constant, we can fold the cast into the constant.
       if (Op0->getType() != Op1->getType()) {
         if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+          Op1 = Context->getConstantExprBitCast(Op1C, Op0->getType());
         } else {
           // Otherwise, cast the RHS right before the icmp
           Op1 = InsertBitCastBefore(Op1, Op0->getType(), I);
@@ -6346,7 +6397,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
             // Mask = -1 >> count-trailing-zeros(Cst).
             if (!CI->isZero() && !CI->isOne()) {
               const APInt &AP = CI->getValue();
-              ConstantInt *Mask = ConstantInt::get(
+              ConstantInt *Mask = Context->getConstantInt(
                                       APInt::getLowBitsSet(AP.getBitWidth(),
                                                            AP.getBitWidth() -
                                                       AP.countTrailingZeros()));
@@ -6384,7 +6435,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
         Value *OtherVal = A == Op1 ? B : A;
         return new ICmpInst(I.getPredicate(), OtherVal,
-                            Constant::getNullValue(A->getType()));
+                            Context->getNullValue(A->getType()));
       }
 
       if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
@@ -6392,7 +6443,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         ConstantInt *C1, *C2;
         if (match(B, m_ConstantInt(C1)) &&
             match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
-          Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue());
+          Constant *NC = 
+                       Context->getConstantInt(C1->getValue() ^ C2->getValue());
           Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp");
           return new ICmpInst(I.getPredicate(), A,
                               InsertNewInstBefore(Xor, I));
@@ -6411,18 +6463,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // A == (A^B)  ->  B == 0
       Value *OtherVal = A == Op0 ? B : A;
       return new ICmpInst(I.getPredicate(), OtherVal,
-                          Constant::getNullValue(A->getType()));
+                          Context->getNullValue(A->getType()));
     }
 
     // (A-B) == A  ->  B == 0
     if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
       return new ICmpInst(I.getPredicate(), B, 
-                          Constant::getNullValue(B->getType()));
+                          Context->getNullValue(B->getType()));
 
     // A == (A-B)  ->  B == 0
     if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
       return new ICmpInst(I.getPredicate(), B,
-                          Constant::getNullValue(B->getType()));
+                          Context->getNullValue(B->getType()));
     
     // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
     if (Op0->hasOneUse() && Op1->hasOneUse() &&
@@ -6444,7 +6496,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I);
         Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I);
         I.setOperand(0, Op1);
-        I.setOperand(1, Constant::getNullValue(Op1->getType()));
+        I.setOperand(1, Context->getNullValue(Op1->getType()));
         return &I;
       }
     }
@@ -6483,13 +6535,13 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
   // C2 (CI). By solving for X we can turn this into a range check 
   // instead of computing a divide. 
-  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+  Constant *Prod = Context->getConstantExprMul(CmpRHS, DivRHS);
 
   // Determine if the product overflows by seeing if the product is
   // not equal to the divide. Make sure we do the same kind of divide
   // as in the LHS instruction that we're folding. 
-  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
-                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+  bool ProdOV = (DivIsSigned ? Context->getConstantExprSDiv(Prod, DivRHS) :
+                 Context->getConstantExprUDiv(Prod, DivRHS)) != CmpRHS;
 
   // Get the ICmp opcode
   ICmpInst::Predicate Pred = ICI.getPredicate();
@@ -6509,47 +6561,50 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
     LoBound = Prod;
     HiOverflow = LoOverflow = ProdOV;
     if (!HiOverflow)
-      HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+      HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, Context, false);
   } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
     if (CmpRHSV == 0) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
-      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
+      LoBound = cast<ConstantInt>(Context->getConstantExprNeg(SubOne(DivRHS, 
+                                                                    Context)));
       HiBound = DivRHS;
     } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
       HiOverflow = LoOverflow = ProdOV;
       if (!HiOverflow)
-        HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+        HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true);
     } else {                       // (X / pos) op neg
       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
-      HiBound = AddOne(Prod);
+      HiBound = AddOne(Prod, Context);
       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow) {
-        ConstantInt* DivNeg = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
-        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg,
+        ConstantInt* DivNeg =
+                         cast<ConstantInt>(Context->getConstantExprNeg(DivRHS));
+        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context,
                                      true) ? -1 : 0;
        }
     }
   } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
     if (CmpRHSV == 0) {       // (X / neg) op 0
       // e.g. X/-5 op 0  --> [-4, 5)
-      LoBound = AddOne(DivRHS);
-      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+      LoBound = AddOne(DivRHS, Context);
+      HiBound = cast<ConstantInt>(Context->getConstantExprNeg(DivRHS));
       if (HiBound == DivRHS) {     // -INTMIN = INTMIN
         HiOverflow = 1;            // [INTMIN+1, overflow)
         HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
       }
     } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
       // e.g. X/-5 op 3  --> [-19, -14)
-      HiBound = AddOne(Prod);
+      HiBound = AddOne(Prod, Context);
       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow)
-        LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0;
+        LoOverflow = AddWithOverflow(LoBound, HiBound,
+                                     DivRHS, Context, true) ? -1 : 0;
     } else {                       // (X / neg) op neg
       LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
       LoOverflow = HiOverflow = ProdOV;
       if (!HiOverflow)
-        HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
+        HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, Context, true);
     }
     
     // Dividing by a negative swaps the condition.  LT <-> GT
@@ -6561,7 +6616,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   default: assert(0 && "Unhandled icmp opcode!");
   case ICmpInst::ICMP_EQ:
     if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
     else if (HiOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
                           ICmpInst::ICMP_UGE, X, LoBound);
@@ -6572,7 +6627,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
       return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
   case ICmpInst::ICMP_NE:
     if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
     else if (HiOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
                           ICmpInst::ICMP_ULT, X, LoBound);
@@ -6584,16 +6639,16 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_SLT:
     if (LoOverflow == +1)   // Low bound is greater than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
     if (LoOverflow == -1)   // Low bound is less than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
     return new ICmpInst(Pred, X, LoBound);
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_SGT:
     if (HiOverflow == +1)       // High bound greater than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
     else if (HiOverflow == -1)  // High bound less than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
     if (Pred == ICmpInst::ICMP_UGT)
       return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
     else
@@ -6627,7 +6682,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         NewRHS.zext(SrcBits);
         NewRHS |= KnownOne;
         return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                            ConstantInt::get(NewRHS));
+                            Context->getConstantInt(NewRHS));
       }
     }
     break;
@@ -6655,9 +6710,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         isTrueIfPositive ^= true;
         
         if (isTrueIfPositive)
-          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS));
+          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+                              SubOne(RHS, Context));
         else
-          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, AddOne(RHS));
+          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+                              AddOne(RHS, Context));
       }
 
       if (LHSI->hasOneUse()) {
@@ -6668,7 +6725,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                          ? ICI.getUnsignedPredicate()
                                          : ICI.getSignedPredicate();
           return new ICmpInst(Pred, LHSI->getOperand(0),
-                              ConstantInt::get(RHSV ^ SignBit));
+                              Context->getConstantInt(RHSV ^ SignBit));
         }
 
         // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
@@ -6679,7 +6736,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                          : ICI.getSignedPredicate();
           Pred = ICI.getSwappedPredicate(Pred);
           return new ICmpInst(Pred, LHSI->getOperand(0),
-                              ConstantInt::get(RHSV ^ NotSignBit));
+                              Context->getConstantInt(RHSV ^ NotSignBit));
         }
       }
     }
@@ -6708,10 +6765,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           NewCI.zext(BitWidth);
           Instruction *NewAnd = 
             BinaryOperator::CreateAnd(Cast->getOperand(0),
-                                      ConstantInt::get(NewCST),LHSI->getName());
+                               Context->getConstantInt(NewCST),LHSI->getName());
           InsertNewInstBefore(NewAnd, ICI);
           return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              ConstantInt::get(NewCI));
+                              Context->getConstantInt(NewCI));
         }
       }
       
@@ -6748,27 +6805,28 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (CanFold) {
           Constant *NewCst;
           if (Shift->getOpcode() == Instruction::Shl)
-            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+            NewCst = Context->getConstantExprLShr(RHS, ShAmt);
           else
-            NewCst = ConstantExpr::getShl(RHS, ShAmt);
+            NewCst = Context->getConstantExprShl(RHS, ShAmt);
           
           // Check to see if we are shifting out any of the bits being
           // compared.
-          if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != RHS) {
+          if (Context->getConstantExpr(Shift->getOpcode(),
+                                       NewCst, ShAmt) != RHS) {
             // If we shifted bits out, the fold is not going to work out.
             // As a special case, check to see if this means that the
             // result is always true or false now.
             if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+              return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
             if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+              return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
           } else {
             ICI.setOperand(1, NewCst);
             Constant *NewAndCST;
             if (Shift->getOpcode() == Instruction::Shl)
-              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+              NewAndCST = Context->getConstantExprLShr(AndCST, ShAmt);
             else
-              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+              NewAndCST = Context->getConstantExprShl(AndCST, ShAmt);
             LHSI->setOperand(1, NewAndCST);
             LHSI->setOperand(0, Shift->getOperand(0));
             AddToWorkList(Shift); // Shift is dead.
@@ -6823,10 +6881,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       // If we are comparing against bits always shifted out, the
       // comparison cannot succeed.
       Constant *Comp =
-        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt);
+        Context->getConstantExprShl(Context->getConstantExprLShr(RHS, ShAmt),
+                                                                 ShAmt);
       if (Comp != RHS) {// Comparing against a bit that we know is zero.
         bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-        Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+        Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE);
         return ReplaceInstUsesWith(ICI, Cst);
       }
       
@@ -6834,14 +6893,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Otherwise strength reduce the shift into an and.
         uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
         Constant *Mask =
-          ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
+          Context->getConstantInt(APInt::getLowBitsSet(TypeBits, 
+                                                       TypeBits-ShAmtVal));
         
         Instruction *AndI =
           BinaryOperator::CreateAnd(LHSI->getOperand(0),
                                     Mask, LHSI->getName()+".mask");
         Value *And = InsertNewInstBefore(AndI, ICI);
         return new ICmpInst(ICI.getPredicate(), And,
-                            ConstantInt::get(RHSV.lshr(ShAmtVal)));
+                            Context->getConstantInt(RHSV.lshr(ShAmtVal)));
       }
     }
     
@@ -6850,7 +6910,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     if (LHSI->hasOneUse() &&
         isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
       // (X << 31) <s 0  --> (X&1) != 0
-      Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) <<
+      Constant *Mask = Context->getConstantInt(APInt(TypeBits, 1) <<
                                            (TypeBits-ShAmt->getZExtValue()-1));
       Instruction *AndI =
         BinaryOperator::CreateAnd(LHSI->getOperand(0),
@@ -6858,7 +6918,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       Value *And = InsertNewInstBefore(AndI, ICI);
       
       return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
-                          And, Constant::getNullValue(And->getType()));
+                          And, Context->getNullValue(And->getType()));
     }
     break;
   }
@@ -6888,7 +6948,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     
     if (Comp != RHSV) { // Comparing against a bit that we know is zero.
       bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-      Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+      Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE);
       return ReplaceInstUsesWith(ICI, Cst);
     }
     
@@ -6899,20 +6959,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         MaskedValueIsZero(LHSI->getOperand(0), 
                           APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
       return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                          ConstantExpr::getShl(RHS, ShAmt));
+                          Context->getConstantExprShl(RHS, ShAmt));
     }
       
     if (LHSI->hasOneUse()) {
       // Otherwise strength reduce the shift into an and.
       APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-      Constant *Mask = ConstantInt::get(Val);
+      Constant *Mask = Context->getConstantInt(Val);
       
       Instruction *AndI =
         BinaryOperator::CreateAnd(LHSI->getOperand(0),
                                   Mask, LHSI->getName()+".mask");
       Value *And = InsertNewInstBefore(AndI, ICI);
       return new ICmpInst(ICI.getPredicate(), And,
-                          ConstantExpr::getShl(RHS, ShAmt));
+                          Context->getConstantExprShl(RHS, ShAmt));
     }
     break;
   }
@@ -6945,18 +7005,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       if (ICI.isSignedPredicate()) {
         if (CR.getLower().isSignBit()) {
           return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
-                              ConstantInt::get(CR.getUpper()));
+                              Context->getConstantInt(CR.getUpper()));
         } else if (CR.getUpper().isSignBit()) {
           return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
-                              ConstantInt::get(CR.getLower()));
+                              Context->getConstantInt(CR.getLower()));
         }
       } else {
         if (CR.getLower().isMinValue()) {
           return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
-                              ConstantInt::get(CR.getUpper()));
+                              Context->getConstantInt(CR.getUpper()));
         } else if (CR.getUpper().isMinValue()) {
           return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
-                              ConstantInt::get(CR.getLower()));
+                              Context->getConstantInt(CR.getLower()));
         }
       }
     }
@@ -6981,7 +7041,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                          BO->getName());
             InsertNewInstBefore(NewRem, ICI);
             return new ICmpInst(ICI.getPredicate(), NewRem, 
-                                Constant::getNullValue(BO->getType()));
+                                Context->getNullValue(BO->getType()));
           }
         }
         break;
@@ -6990,15 +7050,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
           if (BO->hasOneUse())
             return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                ConstantExpr::getSub(RHS, BOp1C));
+                                Context->getConstantExprSub(RHS, BOp1C));
         } else if (RHSV == 0) {
           // Replace ((add A, B) != 0) with (A != -B) if A or B is
           // efficiently invertible, or if the add has just this one use.
           Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
           
-          if (Value *NegVal = dyn_castNegVal(BOp1))
+          if (Value *NegVal = dyn_castNegVal(BOp1, Context))
             return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
-          else if (Value *NegVal = dyn_castNegVal(BOp0))
+          else if (Value *NegVal = dyn_castNegVal(BOp0, Context))
             return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
           else if (BO->hasOneUse()) {
             Instruction *Neg = BinaryOperator::CreateNeg(BOp1);
@@ -7013,7 +7073,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // the explicit xor.
         if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
           return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
-                              ConstantExpr::getXor(RHS, BOC));
+                              Context->getConstantExprXor(RHS, BOC));
         
         // FALLTHROUGH
       case Instruction::Sub:
@@ -7027,10 +7087,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // If bits are being or'd in that are not present in the constant we
         // are comparing against, then the comparison could never succeed!
         if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
-          Constant *NotCI = ConstantExpr::getNot(RHS);
-          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
-            return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty, 
-                                                             isICMP_NE));
+          Constant *NotCI = Context->getConstantExprNot(RHS);
+          if (!Context->getConstantExprAnd(BOC, NotCI)->isNullValue())
+            return ReplaceInstUsesWith(ICI,
+                                       Context->getConstantInt(Type::Int1Ty, 
+                                       isICMP_NE));
         }
         break;
         
@@ -7039,19 +7100,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           // If bits are being compared against that are and'd out, then the
           // comparison can never succeed!
           if ((RHSV & ~BOC->getValue()) != 0)
-            return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty,
-                                                             isICMP_NE));
+            return ReplaceInstUsesWith(ICI,
+                                       Context->getConstantInt(Type::Int1Ty,
+                                       isICMP_NE));
           
           // If we have ((X & C) == C), turn it into ((X & C) != 0).
           if (RHS == BOC && RHSV.isPowerOf2())
             return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
                                 ICmpInst::ICMP_NE, LHSI,
-                                Constant::getNullValue(RHS->getType()));
+                                Context->getNullValue(RHS->getType()));
           
           // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
           if (BOC->getValue().isSignBit()) {
             Value *X = BO->getOperand(0);
-            Constant *Zero = Constant::getNullValue(X->getType());
+            Constant *Zero = Context->getNullValue(X->getType());
             ICmpInst::Predicate pred = isICMP_NE ? 
               ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
             return new ICmpInst(pred, X, Zero);
@@ -7060,7 +7122,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           // ((X & ~7) == 0) --> X < 8
           if (RHSV == 0 && isHighOnes(BOC)) {
             Value *X = BO->getOperand(0);
-            Constant *NegX = ConstantExpr::getNeg(BOC);
+            Constant *NegX = Context->getConstantExprNeg(BOC);
             ICmpInst::Predicate pred = isICMP_NE ? 
               ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
             return new ICmpInst(pred, X, NegX);
@@ -7073,7 +7135,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       if (II->getIntrinsicID() == Intrinsic::bswap) {
         AddToWorkList(II);
         ICI.setOperand(0, II->getOperand(1));
-        ICI.setOperand(1, ConstantInt::get(RHSV.byteSwap()));
+        ICI.setOperand(1, Context->getConstantInt(RHSV.byteSwap()));
         return &ICI;
       }
     }
@@ -7098,7 +7160,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
          cast<IntegerType>(DestTy)->getBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
-      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+      RHSOp = Context->getConstantExprIntToPtr(RHSC, SrcTy);
     } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
       RHSOp = RHSC->getOperand(0);
       // If the pointer types don't match, insert a bitcast.
@@ -7150,8 +7212,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
   // Compute the constant that would happen if we truncated to SrcTy then
   // reextended to DestTy.
-  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
-  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
+  Constant *Res1 = Context->getConstantExprTrunc(CI, SrcTy);
+  Constant *Res2 = Context->getConstantExprCast(LHSCI->getOpcode(),
+                                                Res1, DestTy);
 
   // If the re-extended constant didn't change...
   if (Res2 == CI) {
@@ -7176,9 +7239,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   // First, handle some easy cases. We know the result cannot be equal at this
   // point so handle the ICI.isEquality() cases
   if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+    return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
   if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+    return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
 
   // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
   // should have been folded away previously and not enter in here.
@@ -7186,20 +7249,20 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   if (isSignedCmp) {
     // We're performing a signed comparison.
     if (cast<ConstantInt>(CI)->getValue().isNegative())
-      Result = ConstantInt::getFalse();          // X < (small) --> false
+      Result = Context->getConstantIntFalse();          // X < (small) --> false
     else
-      Result = ConstantInt::getTrue();           // X < (large) --> true
+      Result = Context->getConstantIntTrue();           // X < (large) --> true
   } else {
     // We're performing an unsigned comparison.
     if (isSignedExt) {
       // We're performing an unsigned comp with a sign extended value.
       // This is true if the input is >= 0. [aka >s -1]
-      Constant *NegOne = ConstantInt::getAllOnesValue(SrcTy);
+      Constant *NegOne = Context->getConstantIntAllOnesValue(SrcTy);
       Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp,
                                    NegOne, ICI.getName()), ICI);
     } else {
       // Unsigned extend & unsigned compare -> always true.
-      Result = ConstantInt::getTrue();
+      Result = Context->getConstantIntTrue();
     }
   }
 
@@ -7212,7 +7275,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
           ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
          "ICmp should be folded!");
   if (Constant *CI = dyn_cast<Constant>(Result))
-    return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+    return ReplaceInstUsesWith(ICI, Context->getConstantExprNot(CI));
   return BinaryOperator::CreateNot(Result);
 }
 
@@ -7254,21 +7317,21 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
 
   // shl X, 0 == X and shr X, 0 == X
   // shl 0, X == 0 and shr 0, X == 0
-  if (Op1 == Constant::getNullValue(Op1->getType()) ||
-      Op0 == Constant::getNullValue(Op0->getType()))
+  if (Op1 == Context->getNullValue(Op1->getType()) ||
+      Op0 == Context->getNullValue(Op0->getType()))
     return ReplaceInstUsesWith(I, Op0);
   
   if (isa<UndefValue>(Op0)) {            
     if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
       return ReplaceInstUsesWith(I, Op0);
     else                                    // undef << X -> 0, undef >>u X -> 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
   }
   if (isa<UndefValue>(Op1)) {
     if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
       return ReplaceInstUsesWith(I, Op0);          
     else                                     // X << undef, X >>u undef -> 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
   }
 
   // See if we can fold away this shift.
@@ -7300,9 +7363,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   //
   if (Op1->uge(TypeBits)) {
     if (I.getOpcode() != Instruction::AShr)
-      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+      return ReplaceInstUsesWith(I, Context->getNullValue(Op0->getType()));
     else {
-      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+      I.setOperand(1, Context->getConstantInt(I.getType(), TypeBits-1));
       return &I;
     }
   }
@@ -7312,7 +7375,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (BO->getOpcode() == Instruction::Mul && isLeftShift)
       if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
         return BinaryOperator::CreateMul(BO->getOperand(0),
-                                         ConstantExpr::getShl(BOOp, Op1));
+                                        Context->getConstantExprShl(BOOp, Op1));
   
   // Try to fold constant and into select arguments.
   if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
@@ -7333,7 +7396,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
         isa<ConstantInt>(TrOp->getOperand(1))) {
       // Okay, we'll do this xform.  Make the shift of shift.
-      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      Constant *ShAmt = Context->getConstantExprZExt(Op1, TrOp->getType());
       Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt,
                                                 I.getName());
       InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2)
@@ -7357,8 +7420,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         MaskV = MaskV.lshr(Op1->getZExtValue());
       }
 
-      Instruction *And = BinaryOperator::CreateAnd(NSh, ConstantInt::get(MaskV),
-                                                   TI->getName());
+      Instruction *And =
+        BinaryOperator::CreateAnd(NSh, Context->getConstantInt(MaskV), 
+                                  TI->getName());
       InsertNewInstBefore(And, I); // shift1 & 0x00FF
 
       // Return the value truncated to the interesting size.
@@ -7390,7 +7454,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                      Op0BO->getOperand(1)->getName());
             InsertNewInstBefore(X, I);  // (X + (Y << C))
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, ConstantInt::get(
+            return BinaryOperator::CreateAnd(X, Context->getConstantInt(
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -7406,7 +7470,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                                      Op0BO->getName());
             InsertNewInstBefore(YS, I); // (Y << C)
             Instruction *XM =
-              BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+              BinaryOperator::CreateAnd(V1,
+                                        Context->getConstantExprShl(CC, Op1),
                                         V1->getName()+".mask");
             InsertNewInstBefore(XM, I); // X & (CC << C)
             
@@ -7428,7 +7493,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                      Op0BO->getOperand(0)->getName());
             InsertNewInstBefore(X, I);  // (X + (Y << C))
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, ConstantInt::get(
+            return BinaryOperator::CreateAnd(X, Context->getConstantInt(
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -7444,7 +7509,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                                      Op0BO->getName());
             InsertNewInstBefore(YS, I); // (Y << C)
             Instruction *XM =
-              BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+              BinaryOperator::CreateAnd(V1, 
+                                        Context->getConstantExprShl(CC, Op1),
                                         V1->getName()+".mask");
             InsertNewInstBefore(XM, I); // X & (CC << C)
             
@@ -7486,7 +7552,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
         
         if (isValid) {
-          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+          Constant *NewRHS = Context->getConstantExpr(I.getOpcode(), Op0C, Op1);
           
           Instruction *NewShift =
             BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1);
@@ -7523,19 +7589,19 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // saturates.
       if (AmtSum >= TypeBits) {
         if (I.getOpcode() != Instruction::AShr)
-          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+          return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
         AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
       }
       
       return BinaryOperator::Create(I.getOpcode(), X,
-                                    ConstantInt::get(Ty, AmtSum));
+                                    Context->getConstantInt(Ty, AmtSum));
     } else if (ShiftOp->getOpcode() == Instruction::LShr &&
                I.getOpcode() == Instruction::AShr) {
       if (AmtSum >= TypeBits)
-        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+        return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
       
       // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
-      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
+      return BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, AmtSum));
     } else if (ShiftOp->getOpcode() == Instruction::AShr &&
                I.getOpcode() == Instruction::LShr) {
       // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
@@ -7543,11 +7609,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         AmtSum = TypeBits-1;
       
       Instruction *Shift =
-        BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
+        BinaryOperator::CreateAShr(X, Context->getConstantInt(Ty, AmtSum));
       InsertNewInstBefore(Shift, I);
 
       APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-      return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+      return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
     }
     
     // Okay, if we get here, one shift must be left, and the other shift must be
@@ -7556,12 +7622,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // If we have ((X >>? C) << C), turn this into X & (-1 << C).
       if (I.getOpcode() == Instruction::Shl) {
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask));
       }
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr) {
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask));
       }
       // We can simplify ((X << C) >>s C) into a trunc + sext.
       // NOTE: we could do this for any C, but that would make 'unusual' integer
@@ -7575,7 +7641,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       case 32 :
       case 64 :
       case 128:
-        SExtType = IntegerType::get(Ty->getBitWidth() - ShiftAmt1);
+        SExtType = Context->getIntegerType(Ty->getBitWidth() - ShiftAmt1);
         break;
       default: break;
       }
@@ -7593,22 +7659,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
         Instruction *Shift =
-          BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
       }
       
       // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
         Instruction *Shift =
-          BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
       }
       
       // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
@@ -7622,22 +7688,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                ShiftOp->getOpcode() == Instruction::AShr);
         Instruction *Shift =
           BinaryOperator::Create(ShiftOp->getOpcode(), X,
-                                 ConstantInt::get(Ty, ShiftDiff));
+                                 Context->getConstantInt(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
       }
       
       // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
         Instruction *Shift =
-          BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
       }
       
       // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
@@ -7652,12 +7718,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
 /// X*Scale+Offset.
 ///
 static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
-                                        int &Offset) {
+                                        int &Offset, LLVMContext* Context) {
   assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     Offset = CI->getZExtValue();
     Scale  = 0;
-    return ConstantInt::get(Type::Int32Ty, 0);
+    return Context->getConstantInt(Type::Int32Ty, 0);
   } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
       if (I->getOpcode() == Instruction::Shl) {
@@ -7675,7 +7741,8 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
         // where C1 is divisible by C2.
         unsigned SubScale;
         Value *SubVal = 
-          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale,
+                                    Offset, Context);
         Offset += RHS->getZExtValue();
         Scale = SubScale;
         return SubVal;
@@ -7736,7 +7803,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   unsigned ArraySizeScale;
   int ArrayOffset;
   Value *NumElements = // See if the array size is a decomposable linear expr.
-    DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+    DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale,
+                              ArrayOffset, Context);
  
   // If we can now satisfy the modulus, by using a non-1 scale, we really can
   // do the xform.
@@ -7749,9 +7817,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
     Amt = NumElements;
   } else {
     // If the allocation size is constant, form a constant mul expression
-    Amt = ConstantInt::get(Type::Int32Ty, Scale);
+    Amt = Context->getConstantInt(Type::Int32Ty, Scale);
     if (isa<ConstantInt>(NumElements))
-      Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements),
+      Amt = Context->getConstantExprMul(cast<ConstantInt>(NumElements),
                                  cast<ConstantInt>(Amt));
     // otherwise multiply the amount and the number of elements
     else {
@@ -7761,7 +7829,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   }
   
   if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
-    Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true);
+    Value *Off = Context->getConstantInt(Type::Int32Ty, Offset, true);
     Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp");
     Amt = InsertNewInstBefore(Tmp, AI);
   }
@@ -7925,7 +7993,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
 Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V))
-    return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+    return Context->getConstantExprIntegerCast(C, Ty,
+                                               isSigned /*Sext or ZExt*/);
 
   // Otherwise, it must be an instruction.
   Instruction *I = cast<Instruction>(V);
@@ -8019,7 +8088,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
 /// resultant element type, otherwise return null.
 static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 
                                        SmallVectorImpl<Value*> &NewIndices,
-                                       const TargetData *TD) {
+                                       const TargetData *TD,
+                                       LLVMContext* Context) {
   if (!Ty->isSized()) return 0;
   
   // Start with the index over the outer type.  Note that the type size
@@ -8040,7 +8110,7 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
     assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
   }
   
-  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+  NewIndices.push_back(Context->getConstantInt(IntPtrTy, FirstIdx));
     
   // Index into the types.  If we fail, set OrigBase to null.
   while (Offset) {
@@ -8054,14 +8124,14 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
              "Offset must stay within the indexed type");
       
       unsigned Elt = SL->getElementContainingOffset(Offset);
-      NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt));
+      NewIndices.push_back(Context->getConstantInt(Type::Int32Ty, Elt));
       
       Offset -= SL->getElementOffset(Elt);
       Ty = STy->getElementType(Elt);
     } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
       uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
       assert(EltSize && "Cannot index into a zero-sized array");
-      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      NewIndices.push_back(Context->getConstantInt(IntPtrTy,Offset/EltSize));
       Offset %= EltSize;
       Ty = AT->getElementType();
     } else {
@@ -8096,7 +8166,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
     if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
       if (GEP->hasAllConstantIndices()) {
         // We are guaranteed to get a constant from EmitGEPOffset.
-        ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, CI, *this));
+        ConstantInt *OffsetV =
+                      cast<ConstantInt>(EmitGEPOffset(GEP, CI, *this));
         int64_t Offset = OffsetV->getSExtValue();
         
         // Get the base pointer input of the bitcast, and the type it points to.
@@ -8104,7 +8175,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
         const Type *GEPIdxTy =
           cast<PointerType>(OrigBase->getType())->getElementType();
         SmallVector<Value*, 8> NewIndices;
-        if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD)) {
+        if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD, Context)) {
           // If we were able to index down into an element, create the GEP
           // and bitcast the result.  This eliminates one bitcast, potentially
           // two.
@@ -8261,7 +8332,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
           return ReplaceInstUsesWith(CI, Res);
 
         // We need to emit an AND to clear the high bits.
-        Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize,
+        Constant *C = Context->getConstantInt(APInt::getLowBitsSet(DestBitSize,
                                                             SrcBitSize));
         return BinaryOperator::CreateAnd(Res, C);
       }
@@ -8309,10 +8380,11 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
     // cast (xor bool X, true) to int  --> xor (cast bool X to int), 1
     if (isa<ZExtInst>(CI) && SrcBitSize == 1 && 
         SrcI->getOpcode() == Instruction::Xor &&
-        Op1 == ConstantInt::getTrue() &&
+        Op1 == Context->getConstantIntTrue() &&
         (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
       Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI);
-      return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+      return BinaryOperator::CreateXor(New,
+                                      Context->getConstantInt(CI.getType(), 1));
     }
     break;
   case Instruction::SDiv:
@@ -8380,9 +8452,9 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
   if (DestBitWidth == 1 &&
       isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {
-    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Constant *One = Context->getConstantInt(Src->getType(), 1);
     Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
-    Value *Zero = Constant::getNullValue(Src->getType());
+    Value *Zero = Context->getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
 
@@ -8397,12 +8469,12 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth));
     if (MaskedValueIsZero(ShiftOp, Mask)) {
       if (ShAmt >= DestBitWidth)        // All zeros.
-        return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty));
+        return ReplaceInstUsesWith(CI, Context->getNullValue(Ty));
       
       // Okay, we can shrink this.  Truncate the input, then return a new
       // shift.
       Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI);
-      Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty);
+      Value *V2 = Context->getConstantExprTrunc(ShAmtV, Ty);
       return BinaryOperator::CreateLShr(V1, V2);
     }
   }
@@ -8427,7 +8499,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
       if (!DoXform) return ICI;
 
       Value *In = ICI->getOperand(0);
-      Value *Sh = ConstantInt::get(In->getType(),
+      Value *Sh = Context->getConstantInt(In->getType(),
                                    In->getType()->getScalarSizeInBits()-1);
       In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
                                                         In->getName()+".lobit"),
@@ -8437,7 +8509,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
                                          false/*ZExt*/, "tmp", &CI);
 
       if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
-        Constant *One = ConstantInt::get(In->getType(), 1);
+        Constant *One = Context->getConstantInt(In->getType(), 1);
         In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One,
                                                          In->getName()+".not"),
                                  CI);
@@ -8473,8 +8545,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
         if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
           // (X&4) == 2 --> false
           // (X&4) != 2 --> true
-          Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
-          Res = ConstantExpr::getZExt(Res, CI.getType());
+          Constant *Res = Context->getConstantInt(Type::Int1Ty, isNE);
+          Res = Context->getConstantExprZExt(Res, CI.getType());
           return ReplaceInstUsesWith(CI, Res);
         }
           
@@ -8484,12 +8556,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
           // Perform a logical shr by shiftamt.
           // Insert the shift to put the result in the low bit.
           In = InsertNewInstBefore(BinaryOperator::CreateLShr(In,
-                                  ConstantInt::get(In->getType(), ShiftAmt),
+                              Context->getConstantInt(In->getType(), ShiftAmt),
                                                    In->getName()+".lobit"), CI);
         }
           
         if ((Op1CV != 0) == isNE) { // Toggle the low bit.
-          Constant *One = ConstantInt::get(In->getType(), 1);
+          Constant *One = Context->getConstantInt(In->getType(), 1);
           In = BinaryOperator::CreateXor(In, One, "tmp");
           InsertNewInstBefore(cast<Instruction>(In), CI);
         }
@@ -8528,20 +8600,21 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     // SrcSize  > DstSize: trunc(a) & mask
     if (SrcSize < DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+      Constant *AndConst = Context->getConstantInt(A->getType(), AndValue);
       Instruction *And =
         BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
       InsertNewInstBefore(And, CI);
       return new ZExtInst(And, CI.getType());
     } else if (SrcSize == DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+      return BinaryOperator::CreateAnd(A, Context->getConstantInt(A->getType(),
                                                            AndValue));
     } else if (SrcSize > DstSize) {
       Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
       InsertNewInstBefore(Trunc, CI);
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
-      return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(),
+      return BinaryOperator::CreateAnd(Trunc, 
+                                       Context->getConstantInt(Trunc->getType(),
                                                                AndValue));
     }
   }
@@ -8572,7 +8645,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
         if (TI0->getType() == CI.getType())
           return
             BinaryOperator::CreateAnd(TI0,
-                                      ConstantExpr::getZExt(C, CI.getType()));
+                                Context->getConstantExprZExt(C, CI.getType()));
       }
 
   // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
@@ -8584,7 +8657,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
           if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
             Value *TI0 = TI->getOperand(0);
             if (TI0->getType() == CI.getType()) {
-              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Constant *ZC = Context->getConstantExprZExt(C, CI.getType());
               Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");
               InsertNewInstBefore(NewAnd, *And);
               return BinaryOperator::CreateXor(NewAnd, ZC);
@@ -8603,8 +8676,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   // Canonicalize sign-extend from i1 to a select.
   if (Src->getType() == Type::Int1Ty)
     return SelectInst::Create(Src,
-                              ConstantInt::getAllOnesValue(CI.getType()),
-                              Constant::getNullValue(CI.getType()));
+                              Context->getConstantIntAllOnesValue(CI.getType()),
+                              Context->getNullValue(CI.getType()));
 
   // See if the value being truncated is already sign extended.  If so, just
   // eliminate the trunc/sext pair.
@@ -8656,7 +8729,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
       unsigned MidSize = Src->getType()->getScalarSizeInBits();
       unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
       unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
-      Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+      Constant *ShAmtV = Context->getConstantInt(CI.getType(), ShAmt);
       I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
                                                         CI.getName()), CI);
       return BinaryOperator::CreateAShr(I, ShAmtV);
@@ -8668,21 +8741,22 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
 
 /// FitsInFPType - Return a Constant* for the specified FP constant if it fits
 /// in the specified FP type without changing its value.
-static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem,
+                              LLVMContext* Context) {
   bool losesInfo;
   APFloat F = CFP->getValueAPF();
   (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
   if (!losesInfo)
-    return ConstantFP::get(F);
+    return Context->getConstantFP(F);
   return 0;
 }
 
 /// LookThroughFPExtensions - If this is an fp extension instruction, look
 /// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V) {
+static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::FPExt)
-      return LookThroughFPExtensions(I->getOperand(0));
+      return LookThroughFPExtensions(I->getOperand(0), Context);
   
   // If this value is a constant, return the constant in the smallest FP type
   // that can accurately represent it.  This allows us to turn
@@ -8691,11 +8765,11 @@ static Value *LookThroughFPExtensions(Value *V) {
     if (CFP->getType() == Type::PPC_FP128Ty)
       return V;  // No constant folding of this.
     // See if the value can be truncated to float and then reextended.
-    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context))
       return V;
     if (CFP->getType() == Type::DoubleTy)
       return V;  // Won't shrink.
-    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context))
       return V;
     // Don't try to shrink to various long double types.
   }
@@ -8721,8 +8795,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
     case Instruction::FDiv:
     case Instruction::FRem:
       const Type *SrcTy = OpI->getType();
-      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
-      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0), Context);
+      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1), Context);
       if (LHSTrunc->getType() != SrcTy && 
           RHSTrunc->getType() != SrcTy) {
         unsigned DstSize = CI.getType()->getScalarSizeInBits();
@@ -8849,7 +8923,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
       // If Offset is evenly divisible by Size, we can do this xform.
       if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
         Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
-        return GetElementPtrInst::Create(X, ConstantInt::get(Offset));
+        return GetElementPtrInst::Create(X, Context->getConstantInt(Offset));
       }
     }
     // TODO: Could handle other cases, e.g. where add is indexing into field of
@@ -8872,7 +8946,8 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
       
       Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(),
                                                             "tmp"), CI);
-      return GetElementPtrInst::Create(P, ConstantInt::get(Offset), "tmp");
+      return GetElementPtrInst::Create(P,
+                                       Context->getConstantInt(Offset), "tmp");
     }
   }
   return 0;
@@ -8921,7 +8996,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If the source and destination are pointers, and this cast is equivalent
     // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
     // This can enhance SROA and other transforms that want type-safe pointers.
-    Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
+    Constant *ZeroUInt = Context->getNullValue(Type::Int32Ty);
     unsigned NumZeros = 0;
     while (SrcElTy != DstElTy && 
            isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
@@ -9000,7 +9075,8 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {
 
 /// GetSelectFoldableConstant - For the same transformation as the previous
 /// function, return the identity constant that goes into the select.
-static Constant *GetSelectFoldableConstant(Instruction *I) {
+static Constant *GetSelectFoldableConstant(Instruction *I,
+                                           LLVMContext* Context) {
   switch (I->getOpcode()) {
   default: assert(0 && "This cannot happen!"); abort();
   case Instruction::Add:
@@ -9010,11 +9086,11 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    return Constant::getNullValue(I->getType());
+    return Context->getNullValue(I->getType());
   case Instruction::And:
-    return Constant::getAllOnesValue(I->getType());
+    return Context->getAllOnesValue(I->getType());
   case Instruction::Mul:
-    return ConstantInt::get(I->getType(), 1);
+    return Context->getConstantInt(I->getType(), 1);
   }
 }
 
@@ -9116,7 +9192,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
         }
 
         if (OpToFold) {
-          Constant *C = GetSelectFoldableConstant(TVI);
+          Constant *C = GetSelectFoldableConstant(TVI, Context);
           Value *OOp = TVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
           // between 0 and 1.
@@ -9145,7 +9221,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
         }
 
         if (OpToFold) {
-          Constant *C = GetSelectFoldableConstant(FVI);
+          Constant *C = GetSelectFoldableConstant(FVI, Context);
           Value *OOp = FVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
           // between 0 and 1.
@@ -9190,7 +9266,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
           return ReplaceInstUsesWith(SI, FalseVal);
         // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
-        Constant *AdjustedRHS = SubOne(CI);
+        Constant *AdjustedRHS = SubOne(CI, Context);
         if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
             (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
           Pred = ICmpInst::getSwappedPredicate(Pred);
@@ -9210,7 +9286,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
           return ReplaceInstUsesWith(SI, FalseVal);
         // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
-        Constant *AdjustedRHS = AddOne(CI);
+        Constant *AdjustedRHS = AddOne(CI, Context);
         if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
             (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
           Pred = ICmpInst::getSwappedPredicate(Pred);
@@ -9247,7 +9323,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
             (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
           Value *In = ICI->getOperand(0);
-          Value *Sh = ConstantInt::get(In->getType(),
+          Value *Sh = Context->getConstantInt(In->getType(),
                                        In->getType()->getScalarSizeInBits()-1);
           In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
                                                           In->getName()+".lobit"),
@@ -9371,7 +9447,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
               // same width. Make an all-ones value by inserting a AShr.
               Value *X = IC->getOperand(0);
               uint32_t Bits = X->getType()->getScalarSizeInBits();
-              Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
+              Constant *ShAmt = Context->getConstantInt(X->getType(), Bits-1);
               Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
                                                         ShAmt, "ones");
               InsertNewInstBefore(SRA, SI);
@@ -9492,7 +9568,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             //        select C, (add X, Y), (sub X, Z)
             Value *NegVal;  // Compute -Z
             if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
-              NegVal = ConstantExpr::getNeg(C);
+              NegVal = Context->getConstantExprNeg(C);
             } else {
               NegVal = InsertNewInstBefore(
                     BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI);
@@ -9638,7 +9714,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
     return 0;  // If not 1/2/4/8 bytes, exit.
   
   // Use an integer load+store unless we can find something better.
-  Type *NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
+  Type *NewPtrTy =
+                Context->getPointerTypeUnqual(Context->getIntegerType(Size<<3));
   
   // Memcpy forces the use of i8* for the source and destination.  That means
   // that if you're using memcpy to move one double around, you'll get a cast
@@ -9667,7 +9744,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
       }
       
       if (SrcETy->isSingleValueType())
-        NewPtrTy = PointerType::getUnqual(SrcETy);
+        NewPtrTy = Context->getPointerTypeUnqual(SrcETy);
     }
   }
   
@@ -9684,7 +9761,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
 
   // Set the size of the copy to 0, it will be deleted on the next iteration.
-  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+  MI->setOperand(3, Context->getNullValue(MemOpLength->getType()));
   return MI;
 }
 
@@ -9708,21 +9785,21 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
-    const Type *ITy = IntegerType::get(Len*8);  // n=1 -> i8.
+    const Type *ITy = Context->getIntegerType(Len*8);  // n=1 -> i8.
     
     Value *Dest = MI->getDest();
-    Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI);
+    Dest = InsertBitCastBefore(Dest, Context->getPointerTypeUnqual(ITy), *MI);
 
     // Alignment 0 is identity for alignment 1 for memset, but not store.
     if (Alignment == 0) Alignment = 1;
     
     // Extract the fill value and store.
     uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
-    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false,
-                                      Alignment), *MI);
+    InsertNewInstBefore(new StoreInst(Context->getConstantInt(ITy, Fill),
+                                      Dest, false, Alignment), *MI);
     
     // Set the size of the copy to 0, it will be deleted on the next iteration.
-    MI->setLength(Constant::getNullValue(LenC->getType()));
+    MI->setLength(Context->getNullValue(LenC->getType()));
     return MI;
   }
 
@@ -9815,7 +9892,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn X86 loadups -> load if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
       Value *Ptr = InsertBitCastBefore(II->getOperand(1),
-                                       PointerType::getUnqual(II->getType()),
+                                   Context->getPointerTypeUnqual(II->getType()),
                                        CI);
       return new LoadInst(Ptr);
     }
@@ -9825,7 +9902,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn stvx -> store if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
       const Type *OpPtrTy = 
-        PointerType::getUnqual(II->getOperand(1)->getType());
+        Context->getPointerTypeUnqual(II->getOperand(1)->getType());
       Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
       return new StoreInst(II->getOperand(1), Ptr);
     }
@@ -9836,7 +9913,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn X86 storeu -> store if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
       const Type *OpPtrTy = 
-        PointerType::getUnqual(II->getOperand(2)->getType());
+        Context->getPointerTypeUnqual(II->getOperand(2)->getType());
       Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
       return new StoreInst(II->getOperand(2), Ptr);
     }
@@ -9876,7 +9953,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         // Cast the input vectors to byte vectors.
         Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
         Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
-        Value *Result = UndefValue::get(Op0->getType());
+        Value *Result = Context->getUndef(Op0->getType());
         
         // Only extract each element once.
         Value *ExtractedElts[32];
@@ -9999,11 +10076,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       Instruction *OldCall = CS.getInstruction();
       // If the call and callee calling conventions don't match, this call must
       // be unreachable, as the call is undefined.
-      new StoreInst(ConstantInt::getTrue(),
-                    UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), 
-                                    OldCall);
+      new StoreInst(Context->getConstantIntTrue(),
+                Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), 
+                                  OldCall);
       if (!OldCall->use_empty())
-        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
+        OldCall->replaceAllUsesWith(Context->getUndef(OldCall->getType()));
       if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
         return EraseInstFromFunction(*OldCall);
       return 0;
@@ -10013,18 +10090,18 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     // This instruction is not reachable, just remove it.  We insert a store to
     // undef so that we know that this code is not reachable, despite the fact
     // that we can't modify the CFG here.
-    new StoreInst(ConstantInt::getTrue(),
-                  UndefValue::get(PointerType::getUnqual(Type::Int1Ty)),
+    new StoreInst(Context->getConstantIntTrue(),
+               Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)),
                   CS.getInstruction());
 
     if (!CS.getInstruction()->use_empty())
       CS.getInstruction()->
-        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
+        replaceAllUsesWith(Context->getUndef(CS.getInstruction()->getType()));
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
       // Don't break the CFG, insert a dummy cond branch.
       BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
-                         ConstantInt::getTrue(), II);
+                         Context->getConstantIntTrue(), II);
     }
     return EraseInstFromFunction(*CS.getInstruction());
   }
@@ -10195,7 +10272,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // If the function takes more arguments than the call was taking, add them
   // now...
   for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
-    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+    Args.push_back(Context->getNullValue(FT->getParamType(i)));
 
   // If we are removing arguments to the function, emit an obnoxious warning...
   if (FT->getNumParams() < NumActualArgs) {
@@ -10268,7 +10345,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       }
       AddUsersToWorkList(*Caller);
     } else {
-      NV = UndefValue::get(Caller->getType());
+      NV = Context->getUndef(Caller->getType());
     }
   }
 
@@ -10393,9 +10470,12 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       // Replace the trampoline call with a direct call.  Let the generic
       // code sort out any function type mismatches.
       FunctionType *NewFTy =
-        FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
-      Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ?
-        NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy));
+                       Context->getFunctionType(FTy->getReturnType(), NewTypes, 
+                                                FTy->isVarArg());
+      Constant *NewCallee =
+        NestF->getType() == Context->getPointerTypeUnqual(NewFTy) ?
+        NestF : Context->getConstantExprBitCast(NestF, 
+                                         Context->getPointerTypeUnqual(NewFTy));
       const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end());
 
       Instruction *NewCaller;
@@ -10427,7 +10507,8 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
   // parameter, there is no need to adjust the argument list.  Let the generic
   // code sort out any function type mismatches.
   Constant *NewCallee =
-    NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy);
+    NestF->getType() == PTy ? NestF : 
+                              Context->getConstantExprBitCast(NestF, PTy);
   CS.setCalledFunction(NewCallee);
   return CS.getInstruction();
 }
@@ -10848,7 +10929,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
       PotentiallyDeadPHIs.insert(&PN);
       if (DeadPHICycle(PU, PotentiallyDeadPHIs))
-        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+        return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType()));
     }
    
     // If this phi has a single use, and if that use just computes a value for
@@ -10860,7 +10941,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
     if (PHIUser->hasOneUse() &&
         (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
         PHIUser->use_back() == &PN) {
-      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+      return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType()));
     }
   }
 
@@ -10924,7 +11005,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     return ReplaceInstUsesWith(GEP, PtrOp);
 
   if (isa<UndefValue>(GEP.getOperand(0)))
-    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
+    return ReplaceInstUsesWith(GEP, Context->getUndef(GEP.getType()));
 
   bool HasZeroPointerIndex = false;
   if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1)))
@@ -10960,7 +11041,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       Value *Op = *i;
       if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
         if (Constant *C = dyn_cast<Constant>(Op)) {
-          *i = ConstantExpr::getTrunc(C, TD->getIntPtrType());
+          *i = Context->getConstantExprTrunc(C, TD->getIntPtrType());
           MadeChange = true;
         } else {
           Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
@@ -10970,7 +11051,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         }
       } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) {
         if (Constant *C = dyn_cast<Constant>(Op)) {
-          *i = ConstantExpr::getSExt(C, TD->getIntPtrType());
+          *i = Context->getConstantExprSExt(C, TD->getIntPtrType());
           MadeChange = true;
         } else {
           Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(),
@@ -11014,18 +11095,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // With:    T = long A+B; gep %P, T, ...
       //
       Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1);
-      if (SO1 == Constant::getNullValue(SO1->getType())) {
+      if (SO1 == Context->getNullValue(SO1->getType())) {
         Sum = GO1;
-      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+      } else if (GO1 == Context->getNullValue(GO1->getType())) {
         Sum = SO1;
       } else {
         // If they aren't the same type, convert both to an integer of the
         // target's pointer size.
         if (SO1->getType() != GO1->getType()) {
           if (Constant *SO1C = dyn_cast<Constant>(SO1)) {
-            SO1 = ConstantExpr::getIntegerCast(SO1C, GO1->getType(), true);
+            SO1 =
+                Context->getConstantExprIntegerCast(SO1C, GO1->getType(), true);
           } else if (Constant *GO1C = dyn_cast<Constant>(GO1)) {
-            GO1 = ConstantExpr::getIntegerCast(GO1C, SO1->getType(), true);
+            GO1 =
+                Context->getConstantExprIntegerCast(GO1C, SO1->getType(), true);
           } else {
             unsigned PS = TD->getPointerSizeInBits();
             if (TD->getTypeSizeInBits(SO1->getType()) == PS) {
@@ -11043,7 +11126,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           }
         }
         if (isa<Constant>(SO1) && isa<Constant>(GO1))
-          Sum = ConstantExpr::getAdd(cast<Constant>(SO1), cast<Constant>(GO1));
+          Sum = Context->getConstantExprAdd(cast<Constant>(SO1), 
+                                            cast<Constant>(GO1));
         else {
           Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
           InsertNewInstBefore(cast<Instruction>(Sum), GEP);
@@ -11085,7 +11169,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       Indices.push_back(cast<Constant>(*I));
 
     if (I == E) {  // If they are all constants...
-      Constant *CE = ConstantExpr::getGetElementPtr(GV,
+      Constant *CE = Context->getConstantExprGetElementPtr(GV,
                                                     &Indices[0],Indices.size());
 
       // Replace all uses of the GEP with the new constexpr...
@@ -11136,7 +11220,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
           TD->getTypeAllocSize(ResElTy)) {
         Value *Idx[2];
-        Idx[0] = Constant::getNullValue(Type::Int32Ty);
+        Idx[0] = Context->getNullValue(Type::Int32Ty);
         Idx[1] = GEP.getOperand(1);
         Value *V = InsertNewInstBefore(
                GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP);
@@ -11159,16 +11243,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         ConstantInt *Scale = 0;
         if (ArrayEltSize == 1) {
           NewIdx = GEP.getOperand(1);
-          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
+          Scale = 
+               Context->getConstantInt(cast<IntegerType>(NewIdx->getType()), 1);
         } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
-          NewIdx = ConstantInt::get(CI->getType(), 1);
+          NewIdx = Context->getConstantInt(CI->getType(), 1);
           Scale = CI;
         } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
           if (Inst->getOpcode() == Instruction::Shl &&
               isa<ConstantInt>(Inst->getOperand(1))) {
             ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
             uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
-            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+            Scale = Context->getConstantInt(cast<IntegerType>(Inst->getType()),
                                      1ULL << ShAmtVal);
             NewIdx = Inst->getOperand(0);
           } else if (Inst->getOpcode() == Instruction::Mul &&
@@ -11184,10 +11269,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         // operation after making sure Scale doesn't have the sign bit set.
         if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
             Scale->getZExtValue() % ArrayEltSize == 0) {
-          Scale = ConstantInt::get(Scale->getType(),
+          Scale = Context->getConstantInt(Scale->getType(),
                                    Scale->getZExtValue() / ArrayEltSize);
           if (Scale->getZExtValue() != 1) {
-            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
+            Constant *C =
+                   Context->getConstantExprIntegerCast(Scale, NewIdx->getType(),
                                                        false /*ZExt*/);
             Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale");
             NewIdx = InsertNewInstBefore(Sc, GEP);
@@ -11195,7 +11281,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
           // Insert the new GEP instruction.
           Value *Idx[2];
-          Idx[0] = Constant::getNullValue(Type::Int32Ty);
+          Idx[0] = Context->getNullValue(Type::Int32Ty);
           Idx[1] = NewIdx;
           Instruction *NewGEP =
             GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName());
@@ -11216,7 +11302,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     if (!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
       // Determine how much the GEP moves the pointer.  We are guaranteed to get
       // a constant back from EmitGEPOffset.
-      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP, GEP, *this));
+      ConstantInt *OffsetV =
+                    cast<ConstantInt>(EmitGEPOffset(&GEP, GEP, *this));
       int64_t Offset = OffsetV->getSExtValue();
       
       // If this GEP instruction doesn't move the pointer, just replace the GEP
@@ -11244,7 +11331,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       SmallVector<Value*, 8> NewIndices;
       const Type *InTy =
         cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
-      if (FindElementAtOffset(InTy, Offset, NewIndices, TD)) {
+      if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) {
         Instruction *NGEP =
            GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(),
                                      NewIndices.end());
@@ -11264,7 +11351,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
   if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
       const Type *NewTy = 
-        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+        Context->getArrayType(AI.getAllocatedType(), C->getZExtValue());
       AllocationInst *New = 0;
 
       // Create and insert the replacement instruction...
@@ -11286,7 +11373,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
       // Now that I is pointing to the first non-allocation-inst in the block,
       // insert our getelementptr instruction...
       //
-      Value *NullIdx = Constant::getNullValue(Type::Int32Ty);
+      Value *NullIdx = Context->getNullValue(Type::Int32Ty);
       Value *Idx[2];
       Idx[0] = NullIdx;
       Idx[1] = NullIdx;
@@ -11297,7 +11384,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
       // allocation.
       return ReplaceInstUsesWith(AI, V);
     } else if (isa<UndefValue>(AI.getArraySize())) {
-      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType()));
     }
   }
 
@@ -11306,7 +11393,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
     // Note that we only do this for alloca's, because malloc should allocate
     // and return a unique pointer, even for a zero byte allocation.
     if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
-      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType()));
 
     // If the alignment is 0 (unspecified), assign it the preferred alignment.
     if (AI.getAlignment() == 0)
@@ -11322,8 +11409,8 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
     // Insert a new store to null because we cannot modify the CFG here.
-    new StoreInst(ConstantInt::getTrue(),
-                  UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), &FI);
+    new StoreInst(Context->getConstantIntTrue(),
+           Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), &FI);
     return EraseInstFromFunction(FI);
   }
   
@@ -11363,6 +11450,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
                                         const TargetData *TD) {
   User *CI = cast<User>(LI.getOperand(0));
   Value *CastOp = CI->getOperand(0);
+  LLVMContext* Context = IC.getContext();
 
   if (TD) {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
@@ -11391,7 +11479,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
             SingleChar = 0;
             StrVal = (StrVal << 8) | SingleChar;
           }
-          Value *NL = ConstantInt::get(StrVal);
+          Value *NL = Context->getConstantInt(StrVal);
           return IC.ReplaceInstUsesWith(LI, NL);
         }
       }
@@ -11417,8 +11505,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
           if (ASrcTy->getNumElements() != 0) {
             Value *Idxs[2];
-            Idxs[0] = Idxs[1] = Constant::getNullValue(Type::Int32Ty);
-            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+            Idxs[0] = Idxs[1] = Context->getNullValue(Type::Int32Ty);
+            CastOp = Context->getConstantExprGetElementPtr(CSrc, Idxs, 2);
             SrcTy = cast<PointerType>(CastOp->getType());
             SrcPTy = SrcTy->getElementType();
           }
@@ -11480,9 +11568,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       // that this code is not reachable.  We do this instead of inserting
       // an unreachable instruction directly because we cannot modify the
       // CFG.
-      new StoreInst(UndefValue::get(LI.getType()),
-                    Constant::getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+      new StoreInst(Context->getUndef(LI.getType()),
+                    Context->getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
     }
   } 
 
@@ -11494,9 +11582,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       // Insert a new store to null instruction before the load to indicate that
       // this code is not reachable.  We do this instead of inserting an
       // unreachable instruction directly because we cannot modify the CFG.
-      new StoreInst(UndefValue::get(LI.getType()),
-                    Constant::getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+      new StoreInst(Context->getUndef(LI.getType()),
+                    Context->getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
     }
 
     // Instcombine load (constant global) into the value loaded.
@@ -11517,9 +11605,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
           // that this code is not reachable.  We do this instead of inserting
           // an unreachable instruction directly because we cannot modify the
           // CFG.
-          new StoreInst(UndefValue::get(LI.getType()),
-                        Constant::getNullValue(Op->getType()), &LI);
-          return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+          new StoreInst(Context->getUndef(LI.getType()),
+                        Context->getNullValue(Op->getType()), &LI);
+          return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
         }
 
       } else if (CE->isCast()) {
@@ -11534,9 +11622,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){
     if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
       if (GV->getInitializer()->isNullValue())
-        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
+        return ReplaceInstUsesWith(LI, Context->getNullValue(LI.getType()));
       else if (isa<UndefValue>(GV->getInitializer()))
-        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+        return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
     }
   }
 
@@ -11586,6 +11674,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   User *CI = cast<User>(SI.getOperand(1));
   Value *CastOp = CI->getOperand(0);
+  LLVMContext* Context = IC.getContext();
 
   const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
   const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
@@ -11607,7 +11696,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   // constants.
   if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
     // Index through pointer.
-    Constant *Zero = Constant::getNullValue(Type::Int32Ty);
+    Constant *Zero = Context->getNullValue(Type::Int32Ty);
     NewGEPIndices.push_back(Zero);
     
     while (1) {
@@ -11624,7 +11713,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
       }
     }
     
-    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+    SrcTy = Context->getPointerType(SrcPTy, SrcTy->getAddressSpace());
   }
 
   if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
@@ -11658,7 +11747,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   // emit a GEP to index into its first field.
   if (!NewGEPIndices.empty()) {
     if (Constant *C = dyn_cast<Constant>(CastOp))
-      CastOp = ConstantExpr::getGetElementPtr(C, &NewGEPIndices[0], 
+      CastOp = Context->getConstantExprGetElementPtr(C, &NewGEPIndices[0], 
                                               NewGEPIndices.size());
     else
       CastOp = IC.InsertNewInstBefore(
@@ -11667,7 +11756,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   }
   
   if (Constant *C = dyn_cast<Constant>(SIOp0))
-    NewCast = ConstantExpr::getCast(opcode, C, CastDstTy);
+    NewCast = Context->getConstantExprCast(opcode, C, CastDstTy);
   else
     NewCast = IC.InsertNewInstBefore(
       CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), 
@@ -11828,7 +11917,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (isa<ConstantPointerNull>(Ptr) &&
       cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
     if (!isa<UndefValue>(Val)) {
-      SI.setOperand(0, UndefValue::get(Val->getType()));
+      SI.setOperand(0, Context->getUndef(Val->getType()));
       if (Instruction *U = dyn_cast<Instruction>(Val))
         AddToWorkList(U);  // Dropped a use.
       ++NumCombined;
@@ -12055,7 +12144,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
       if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
         // change 'switch (X+4) case 1:' into 'switch (X) case -3'
         for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
-          SI.setOperand(i,ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
+          SI.setOperand(i,
+                   Context->getConstantExprSub(cast<Constant>(SI.getOperand(i)),
                                                 AddRHS));
         SI.setOperand(0, I->getOperand(0));
         AddToWorkList(I);
@@ -12073,10 +12163,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
 
   if (Constant *C = dyn_cast<Constant>(Agg)) {
     if (isa<UndefValue>(C))
-      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+      return ReplaceInstUsesWith(EV, Context->getUndef(EV.getType()));
       
     if (isa<ConstantAggregateZero>(C))
-      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+      return ReplaceInstUsesWith(EV, Context->getNullValue(EV.getType()));
 
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
       // Extract the element indexed by the first index out of the constant
@@ -12212,17 +12302,18 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
 /// FindScalarElement - Given a vector and an element number, see if the scalar
 /// value is already around as a register, for example if it were inserted then
 /// extracted from the vector.
-static Value *FindScalarElement(Value *V, unsigned EltNo) {
+static Value *FindScalarElement(Value *V, unsigned EltNo,
+                                LLVMContext* Context) {
   assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
   const VectorType *PTy = cast<VectorType>(V->getType());
   unsigned Width = PTy->getNumElements();
   if (EltNo >= Width)  // Out of range access.
-    return UndefValue::get(PTy->getElementType());
+    return Context->getUndef(PTy->getElementType());
   
   if (isa<UndefValue>(V))
-    return UndefValue::get(PTy->getElementType());
+    return Context->getUndef(PTy->getElementType());
   else if (isa<ConstantAggregateZero>(V))
-    return Constant::getNullValue(PTy->getElementType());
+    return Context->getNullValue(PTy->getElementType());
   else if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
     return CP->getOperand(EltNo);
   else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
@@ -12238,17 +12329,17 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
     
     // Otherwise, the insertelement doesn't modify the value, recurse on its
     // vector input.
-    return FindScalarElement(III->getOperand(0), EltNo);
+    return FindScalarElement(III->getOperand(0), EltNo, Context);
   } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
     unsigned LHSWidth =
       cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
     unsigned InEl = getShuffleMask(SVI)[EltNo];
     if (InEl < LHSWidth)
-      return FindScalarElement(SVI->getOperand(0), InEl);
+      return FindScalarElement(SVI->getOperand(0), InEl, Context);
     else if (InEl < LHSWidth*2)
-      return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+      return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context);
     else
-      return UndefValue::get(PTy->getElementType());
+      return Context->getUndef(PTy->getElementType());
   }
   
   // Otherwise, we don't know.
@@ -12258,11 +12349,11 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is undef, replace extract with scalar undef.
   if (isa<UndefValue>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+    return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
 
   // If vector val is constant 0, replace extract with scalar 0.
   if (isa<ConstantAggregateZero>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+    return ReplaceInstUsesWith(EI, Context->getNullValue(EI.getType()));
   
   if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
     // If vector val is constant with all elements the same, replace EI with
@@ -12288,7 +12379,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     // If this is extracting an invalid index, turn this into undef, to avoid
     // crashing the code below.
     if (IndexVal >= VectorWidth)
-      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+      return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
     
     // This instruction only demands the single element from the input vector.
     // If the input vector has a single use, simplify it based on this use
@@ -12303,7 +12394,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       }
     }
     
-    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal, Context))
       return ReplaceInstUsesWith(EI, Elt);
     
     // If the this extractelement is directly using a bitcast from a vector of
@@ -12313,7 +12404,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       if (const VectorType *VT = 
               dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
         if (VT->getNumElements() == VectorWidth)
-          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+          if (Value *Elt = FindScalarElement(BCI->getOperand(0),
+                                             IndexVal, Context))
             return new BitCastInst(Elt, EI.getType());
     }
   }
@@ -12339,7 +12431,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
         unsigned AS = 
           cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
         Value *Ptr = InsertBitCastBefore(I->getOperand(0),
-                                         PointerType::get(EI.getType(), AS),EI);
+                                  Context->getPointerType(EI.getType(), AS),EI);
         GetElementPtrInst *GEP =
           GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep");
         InsertNewInstBefore(GEP, EI);
@@ -12373,7 +12465,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
           SrcIdx -= LHSWidth;
           Src = SVI->getOperand(1);
         } else {
-          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+          return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
         }
         return new ExtractElementInst(Src, SrcIdx);
       }
@@ -12386,21 +12478,22 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 /// elements from either LHS or RHS, return the shuffle mask and true. 
 /// Otherwise, return false.
 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
-                                         std::vector<Constant*> &Mask) {
+                                         std::vector<Constant*> &Mask,
+                                         LLVMContext* Context) {
   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
          "Invalid CollectSingleShuffleElements");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 
   if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, UndefValue::get(Type::Int32Ty));
+    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty));
     return true;
   } else if (V == LHS) {
     for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(ConstantInt::get(Type::Int32Ty, i));
+      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i));
     return true;
   } else if (V == RHS) {
     for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(ConstantInt::get(Type::Int32Ty, i+NumElts));
+      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i+NumElts));
     return true;
   } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
@@ -12415,9 +12508,9 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
     if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
       // Okay, we can handle this if the vector we are insertinting into is
       // transitively ok.
-      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) {
         // If so, update the mask to reflect the inserted undef.
-        Mask[InsertedIdx] = UndefValue::get(Type::Int32Ty);
+        Mask[InsertedIdx] = Context->getUndef(Type::Int32Ty);
         return true;
       }      
     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
@@ -12430,15 +12523,15 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
           // Okay, we can handle this if the vector we are insertinting into is
           // transitively ok.
-          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) {
             // If so, update the mask to reflect the inserted value.
             if (EI->getOperand(0) == LHS) {
               Mask[InsertedIdx % NumElts] = 
-                 ConstantInt::get(Type::Int32Ty, ExtractedIdx);
+                 Context->getConstantInt(Type::Int32Ty, ExtractedIdx);
             } else {
               assert(EI->getOperand(0) == RHS);
               Mask[InsertedIdx % NumElts] = 
-                ConstantInt::get(Type::Int32Ty, ExtractedIdx+NumElts);
+                Context->getConstantInt(Type::Int32Ty, ExtractedIdx+NumElts);
               
             }
             return true;
@@ -12456,17 +12549,17 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 /// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
 /// that computes V and the LHS value of the shuffle.
 static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
-                                     Value *&RHS) {
+                                     Value *&RHS, LLVMContext* Context) {
   assert(isa<VectorType>(V->getType()) && 
          (RHS == 0 || V->getType() == RHS->getType()) &&
          "Invalid shuffle!");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 
   if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, UndefValue::get(Type::Int32Ty));
+    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty));
     return V;
   } else if (isa<ConstantAggregateZero>(V)) {
-    Mask.assign(NumElts, ConstantInt::get(Type::Int32Ty, 0));
+    Mask.assign(NumElts, Context->getConstantInt(Type::Int32Ty, 0));
     return V;
   } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
@@ -12485,25 +12578,27 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
         // otherwise we'd end up with a shuffle of three inputs.
         if (EI->getOperand(0) == RHS || RHS == 0) {
           RHS = EI->getOperand(0);
-          Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+          Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context);
           Mask[InsertedIdx % NumElts] = 
-            ConstantInt::get(Type::Int32Ty, NumElts+ExtractedIdx);
+            Context->getConstantInt(Type::Int32Ty, NumElts+ExtractedIdx);
           return V;
         }
         
         if (VecOp == RHS) {
-          Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          Value *V = CollectShuffleElements(EI->getOperand(0), Mask,
+                                            RHS, Context);
           // Everything but the extracted element is replaced with the RHS.
           for (unsigned i = 0; i != NumElts; ++i) {
             if (i != InsertedIdx)
-              Mask[i] = ConstantInt::get(Type::Int32Ty, NumElts+i);
+              Mask[i] = Context->getConstantInt(Type::Int32Ty, NumElts+i);
           }
           return V;
         }
         
         // If this insertelement is a chain that comes from exactly these two
         // vectors, return the vector and the effective shuffle.
-        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask,
+                                         Context))
           return EI->getOperand(0);
         
       }
@@ -12513,7 +12608,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
   
   // Otherwise, can't do anything fancy.  Return an identity vector.
   for (unsigned i = 0; i != NumElts; ++i)
-    Mask.push_back(ConstantInt::get(Type::Int32Ty, i));
+    Mask.push_back(Context->getConstantInt(Type::Int32Ty, i));
   return V;
 }
 
@@ -12540,7 +12635,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
         return ReplaceInstUsesWith(IE, VecOp);
       
       if (InsertedIdx >= NumVectorElts)  // Out of range insert.
-        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+        return ReplaceInstUsesWith(IE, Context->getUndef(IE.getType()));
       
       // If we are extracting a value from a vector, then inserting it right
       // back into the same place, just use the input vector.
@@ -12557,15 +12652,16 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
         // Build a new shuffle mask.
         std::vector<Constant*> Mask;
         if (isa<UndefValue>(VecOp))
-          Mask.assign(NumVectorElts, UndefValue::get(Type::Int32Ty));
+          Mask.assign(NumVectorElts, Context->getUndef(Type::Int32Ty));
         else {
           assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing");
-          Mask.assign(NumVectorElts, ConstantInt::get(Type::Int32Ty,
+          Mask.assign(NumVectorElts, Context->getConstantInt(Type::Int32Ty,
                                                        NumVectorElts));
         } 
-        Mask[InsertedIdx] = ConstantInt::get(Type::Int32Ty, ExtractedIdx);
+        Mask[InsertedIdx] = 
+                           Context->getConstantInt(Type::Int32Ty, ExtractedIdx);
         return new ShuffleVectorInst(EI->getOperand(0), VecOp,
-                                     ConstantVector::get(Mask));
+                                     Context->getConstantVector(Mask));
       }
       
       // If this insertelement isn't used by some other insertelement, turn it
@@ -12573,10 +12669,11 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
         std::vector<Constant*> Mask;
         Value *RHS = 0;
-        Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
-        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+        Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context);
+        if (RHS == 0) RHS = Context->getUndef(LHS->getType());
         // We now have a shuffle of LHS, RHS, Mask.
-        return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+        return new ShuffleVectorInst(LHS, RHS,
+                                     Context->getConstantVector(Mask));
       }
     }
   }
@@ -12600,7 +12697,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
   // Undefined shuffle mask -> undefined value.
   if (isa<UndefValue>(SVI.getOperand(2)))
-    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+    return ReplaceInstUsesWith(SVI, Context->getUndef(SVI.getType()));
 
   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
 
@@ -12627,21 +12724,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     std::vector<Constant*> Elts;
     for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
       if (Mask[i] >= 2*e)
-        Elts.push_back(UndefValue::get(Type::Int32Ty));
+        Elts.push_back(Context->getUndef(Type::Int32Ty));
       else {
         if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
             (Mask[i] <  e && isa<UndefValue>(LHS))) {
           Mask[i] = 2*e;     // Turn into undef.
-          Elts.push_back(UndefValue::get(Type::Int32Ty));
+          Elts.push_back(Context->getUndef(Type::Int32Ty));
         } else {
           Mask[i] = Mask[i] % e;  // Force to LHS.
-          Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i]));
+          Elts.push_back(Context->getConstantInt(Type::Int32Ty, Mask[i]));
         }
       }
     }
     SVI.setOperand(0, SVI.getOperand(1));
-    SVI.setOperand(1, UndefValue::get(RHS->getType()));
-    SVI.setOperand(2, ConstantVector::get(Elts));
+    SVI.setOperand(1, Context->getUndef(RHS->getType()));
+    SVI.setOperand(2, Context->getConstantVector(Elts));
     LHS = SVI.getOperand(0);
     RHS = SVI.getOperand(1);
     MadeChange = true;
@@ -12691,14 +12788,14 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
         std::vector<Constant*> Elts;
         for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
           if (NewMask[i] >= LHSInNElts*2) {
-            Elts.push_back(UndefValue::get(Type::Int32Ty));
+            Elts.push_back(Context->getUndef(Type::Int32Ty));
           } else {
-            Elts.push_back(ConstantInt::get(Type::Int32Ty, NewMask[i]));
+            Elts.push_back(Context->getConstantInt(Type::Int32Ty, NewMask[i]));
           }
         }
         return new ShuffleVectorInst(LHSSVI->getOperand(0),
                                      LHSSVI->getOperand(1),
-                                     ConstantVector::get(Elts));
+                                     Context->getConstantVector(Elts));
       }
     }
   }
@@ -12868,7 +12965,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
             Changed = true;
           }
           if (!I->use_empty())
-            I->replaceAllUsesWith(UndefValue::get(I->getType()));
+            I->replaceAllUsesWith(Context->getUndef(I->getType()));
           I->eraseFromParent();
         }
       }
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 5a70fc3..dee7bfb 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "jump-threading"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -207,7 +208,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
       if (!isa<IntrinsicInst>(CI))
         Size += 3;
-      else if (isa<VectorType>(CI->getType()))
+      else if (!isa<VectorType>(CI->getType()))
         Size += 1;
     }
   }
@@ -434,7 +435,7 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
          << "' folding condition to '" << BranchDir << "': "
          << *BB->getTerminator();
     ++NumFolds;
-    DestBI->setCondition(ConstantInt::get(Type::Int1Ty, BranchDir));
+    DestBI->setCondition(Context->getConstantInt(Type::Int1Ty, BranchDir));
     ConstantFoldTerminator(BB);
     return true;
   }
@@ -563,7 +564,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     
     // If the returned value is the load itself, replace with an undef. This can
     // only happen in dead loops.
-    if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+    if (AvailableVal == LI) AvailableVal = Context->getUndef(LI->getType());
     LI->replaceAllUsesWith(AvailableVal);
     LI->eraseFromParent();
     return true;
@@ -717,7 +718,7 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
   // Next, figure out which successor we are threading to.
   BasicBlock *SuccBB;
   if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
-    SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse());
+    SuccBB = BI->getSuccessor(PredCst == Context->getConstantIntFalse());
   else {
     SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
     SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst));
@@ -755,7 +756,7 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   // We can only do the simplification for phi nodes of 'false' with AND or
   // 'true' with OR.  See if we have any entries in the phi for this.
   unsigned PredNo = ~0U;
-  ConstantInt *PredCst = ConstantInt::get(Type::Int1Ty, !isAnd);
+  ConstantInt *PredCst = Context->getConstantInt(Type::Int1Ty, !isAnd);
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingValue(i) == PredCst) {
       PredNo = i;
@@ -793,15 +794,16 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
 /// hand sides of the compare instruction, try to determine the result. If the
 /// result can not be determined, a null pointer is returned.
 static Constant *GetResultOfComparison(CmpInst::Predicate pred,
-                                       Value *LHS, Value *RHS) {
+                                       Value *LHS, Value *RHS,
+                                       LLVMContext* Context) {
   if (Constant *CLHS = dyn_cast<Constant>(LHS))
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantExpr::getCompare(pred, CLHS, CRHS);
+      return Context->getConstantExprCompare(pred, CLHS, CRHS);
 
   if (LHS == RHS)
     if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
       return ICmpInst::isTrueWhenEqual(pred) ? 
-                 ConstantInt::getTrue() : ConstantInt::getFalse();
+                 Context->getConstantIntTrue() : Context->getConstantIntFalse();
 
   return 0;
 }
@@ -826,7 +828,8 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     PredVal = PN->getIncomingValue(i);
     
-    Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS);
+    Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal,
+                                          RHS, Context);
     if (!Res) {
       PredVal = 0;
       continue;
@@ -935,9 +938,11 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
    
     // Remap operands to patch up intra-block references.
     for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
-      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i)))
-        if (Value *Remapped = ValueMapping[Inst])
-          New->setOperand(i, Remapped);
+      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+        if (I != ValueMapping.end())
+          New->setOperand(i, I->second);
+      }
   }
   
   // We didn't copy the terminator from BB over to NewBB, because there is now
@@ -953,9 +958,11 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
     Value *IV = PN->getIncomingValueForBlock(BB);
     
     // Remap the value if necessary.
-    if (Instruction *Inst = dyn_cast<Instruction>(IV))
-      if (Value *MappedIV = ValueMapping[Inst])
-        IV = MappedIV;
+    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+      DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+      if (I != ValueMapping.end())
+        IV = I->second;
+    }
     PN->addIncoming(IV, NewBB);
   }
   
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 1021469..d6daeca 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -482,7 +483,7 @@ void LICM::sink(Instruction &I) {
       // Instruction is not used, just delete it.
       CurAST->deleteValue(&I);
       if (!I.use_empty())  // If I has users in unreachable blocks, eliminate.
-        I.replaceAllUsesWith(UndefValue::get(I.getType()));
+        I.replaceAllUsesWith(Context->getUndef(I.getType()));
       I.eraseFromParent();
     } else {
       // Move the instruction to the start of the exit block, after any PHI
@@ -496,7 +497,7 @@ void LICM::sink(Instruction &I) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
     if (!I.use_empty())  // If I has users in unreachable blocks, eliminate.
-      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+      I.replaceAllUsesWith(Context->getUndef(I.getType()));
     I.eraseFromParent();
   } else {
     // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 6f7a7f8..38e3a8b 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -54,6 +54,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/Dominators.h"
@@ -258,6 +259,9 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
     IVExitValue = ExitCondition->getOperand(0);
   if (!L->isLoopInvariant(IVExitValue))
     return false;
+  if (!IVBasedValues.count(
+        ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0))))
+    return false;
 
   // If start value is more then exit value where induction variable
   // increments by 1 then we are potentially dealing with an infinite loop.
@@ -289,14 +293,16 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {
 }
 
 // Return V+1
-static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) {
-  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
+static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, 
+                         LLVMContext* Context) {
+  Constant *One = Context->getConstantInt(V->getType(), 1, Sign);
   return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
 }
 
 // Return V-1
-static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) {
-  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
+static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
+                          LLVMContext* Context) {
+  Constant *One = Context->getConstantInt(V->getType(), 1, Sign);
   return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
 }
 
@@ -491,16 +497,16 @@ bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
   if (Value *V = IVisLT(Op)) {
     // Restrict upper bound.
     if (IVisLE(*ExitCondition)) 
-      V = getMinusOne(V, Sign, PHTerm);
+      V = getMinusOne(V, Sign, PHTerm, Context);
     NUB = getMin(V, IVExitValue, Sign, PHTerm);
   } else if (Value *V = IVisLE(Op)) {
     // Restrict upper bound.
     if (IVisLT(*ExitCondition)) 
-      V = getPlusOne(V, Sign, PHTerm);
+      V = getPlusOne(V, Sign, PHTerm, Context);
     NUB = getMin(V, IVExitValue, Sign, PHTerm);
   } else if (Value *V = IVisGT(Op)) {
     // Restrict lower bound.
-    V = getPlusOne(V, Sign, PHTerm);
+    V = getPlusOne(V, Sign, PHTerm, Context);
     NLB = getMax(V, IVStartValue, Sign, PHTerm);
   } else if (Value *V = IVisGE(Op))
     // Restrict lower bound.
@@ -961,18 +967,18 @@ bool LoopIndexSplit::splitLoop() {
       /* Do nothing */
     }
     else if (IVisLE(*SplitCondition)) {
-      AEV = getPlusOne(SplitValue, Sign, PHTerm);
-      BSV = getPlusOne(SplitValue, Sign, PHTerm);
+      AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
+      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
     } else {
       assert (0 && "Unexpected split condition!");
     }
   }
   else if (IVisLE(*ExitCondition)) {
     if (IVisLT(*SplitCondition)) {
-      AEV = getMinusOne(SplitValue, Sign, PHTerm);
+      AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
     }
     else if (IVisLE(*SplitCondition)) {
-      BSV = getPlusOne(SplitValue, Sign, PHTerm);
+      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
     } else {
       assert (0 && "Unexpected split condition!");
     }
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7a24b35..1f7892a 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -352,10 +352,9 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
 
   // Removing incoming branch from loop preheader to original header.
   // Now original header is inside the loop.
-  for (BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
-       I != E; ++I)
-    if (PHINode *PN = dyn_cast<PHINode>(I))
-      PN->removeIncomingValue(OrigPreHeader);
+  for (BasicBlock::iterator I = OrigHeader->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I)
+    PN->removeIncomingValue(OrigPreHeader);
 
   // Make NewHeader as the new header for the loop.
   L->moveToHeader(NewHeader);
@@ -452,13 +451,10 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
            "Unexpected original pre-header terminator");
     OrigPH_BI->setSuccessor(1, NewPreHeader);
   }
-  
-  for (BasicBlock::iterator I = NewHeader->begin(), E = NewHeader->end();
-       I != E; ++I) {
-    PHINode *PN = dyn_cast<PHINode>(I);
-    if (!PN)
-      break;
 
+  PHINode *PN;
+  for (BasicBlock::iterator I = NewHeader->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I) {
     int index = PN->getBasicBlockIndex(OrigPreHeader);
     assert(index != -1 && "Expected incoming value from Original PreHeader");
     PN->setIncomingBlock(index, NewPreHeader);
@@ -545,11 +541,10 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
   BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this);
 
   // Preserve LCSSA.
-  BasicBlock::iterator I = Exit->begin(), E = Exit->end();
-  PHINode *PN = NULL;
-  for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+  for (BasicBlock::iterator I = Exit->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I) {
     unsigned N = PN->getNumIncomingValues();
-    for (unsigned index = 0; index < N; ++index)
+    for (unsigned index = 0; index != N; ++index)
       if (PN->getIncomingBlock(index) == NExit) {
         PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(),
                                          NExit->begin());
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a877c4e..046fed3 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/Dominators.h"
@@ -1575,7 +1576,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   BasicBlock *LatchBlock = L->getLoopLatch();
   Instruction *IVIncInsertPt = LatchBlock->getTerminator();
 
-  Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
+  Value *CommonBaseV = Context->getNullValue(ReplacedTy);
 
   const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
   IVExpr   ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
@@ -1941,7 +1942,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
       NewCmpTy = NewCmpLHS->getType();
       NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
-      const Type *NewCmpIntTy = IntegerType::get(NewTyBits);
+      const Type *NewCmpIntTy = Context->getIntegerType(NewTyBits);
       if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
         // Check if it is possible to rewrite it using
         // an iv / stride of a smaller integer type.
@@ -1986,10 +1987,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
       NewStride = &IU->StrideOrder[i];
       if (!isa<PointerType>(NewCmpTy))
-        NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
+        NewCmpRHS = Context->getConstantInt(NewCmpTy, NewCmpVal);
       else {
-        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
-        NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
+        Constant *CI = Context->getConstantInt(NewCmpIntTy, NewCmpVal);
+        NewCmpRHS = Context->getConstantExprIntToPtr(CI, NewCmpTy);
       }
       NewOffset = TyBits == NewTyBits
         ? SE->getMulExpr(CondUse->getOffset(),
@@ -2233,7 +2234,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
         
       ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
       if (!Init) continue;
-      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+      Constant *NewInit = Context->getConstantFP(DestTy, Init->getZExtValue());
 
       BinaryOperator *Incr = 
         dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -2257,7 +2258,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
 
       /* create new increment. '++d' in above example. */
-      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+      Constant *CFP = Context->getConstantFP(DestTy, C->getZExtValue());
       BinaryOperator *NewIncr = 
         BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
                                  Instruction::FAdd : Instruction::FSub,
@@ -2496,7 +2497,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
   Value *startVal = phi->getIncomingValue(inBlock);
   Value *endVal = Cond->getOperand(1);
   // FIXME check for case where both are constant
-  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
+  Constant* Zero = Context->getConstantInt(Cond->getOperand(1)->getType(), 0);
   BinaryOperator *NewStartVal = 
     BinaryOperator::Create(Instruction::Sub, endVal, startVal,
                            "tmp", PreInsertPt);
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e3e881f..de5eedf 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,6 +32,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -230,7 +231,7 @@ bool LoopUnswitch::processCurrentLoop() {
         Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 
                                                currentLoop, Changed);
         if (LoopCond && UnswitchIfProfitable(LoopCond, 
-                                             ConstantInt::getTrue())) {
+                                             Context->getConstantIntTrue())) {
           ++NumBranches;
           return true;
         }
@@ -260,7 +261,7 @@ bool LoopUnswitch::processCurrentLoop() {
         Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
                                                currentLoop, Changed);
         if (LoopCond && UnswitchIfProfitable(LoopCond, 
-                                             ConstantInt::getTrue())) {
+                                             Context->getConstantIntTrue())) {
           ++NumSelects;
           return true;
         }
@@ -348,10 +349,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
     // this.
     if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                              BI->getSuccessor(0)))) {
-      if (Val) *Val = ConstantInt::getTrue();
+      if (Val) *Val = Context->getConstantIntTrue();
     } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                                     BI->getSuccessor(1)))) {
-      if (Val) *Val = ConstantInt::getFalse();
+      if (Val) *Val = Context->getConstantIntFalse();
     }
   } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
     // If this isn't a switch on Cond, we can't handle it.
@@ -507,7 +508,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
   Value *BranchVal = LIC;
   if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty)
     BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt);
-  else if (Val != ConstantInt::getTrue())
+  else if (Val != Context->getConstantIntTrue())
     // We want to enter the new loop when the condition is true.
     std::swap(TrueDest, FalseDest);
 
@@ -815,7 +816,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     // Anything that uses the instructions in this basic block should have their
     // uses replaced with undefs.
     if (!I->use_empty())
-      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+      I->replaceAllUsesWith(Context->getUndef(I->getType()));
   }
   
   // If this is the edge to the header block for a loop, remove the loop and
@@ -904,7 +905,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     if (IsEqual)
       Replacement = Val;
     else
-      Replacement = ConstantInt::get(Type::Int1Ty, 
+      Replacement = Context->getConstantInt(Type::Int1Ty, 
                                      !cast<ConstantInt>(Val)->getZExtValue());
     
     for (unsigned i = 0, e = Users.size(); i != e; ++i)
@@ -944,7 +945,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
               
               Instruction* OldTerm = Old->getTerminator();
               BranchInst::Create(Split, SISucc,
-                                 ConstantInt::getTrue(), OldTerm);
+                                 Context->getConstantIntTrue(), OldTerm);
 
               LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L);
               Old->getTerminator()->eraseFromParent();
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 5cf0518..3c7a5ab 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
@@ -35,7 +36,7 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
 /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
 /// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
+static Value *isBytewiseValue(Value *V, LLVMContext* Context) {
   // All byte-wide stores are splatable, even of arbitrary variables.
   if (V->getType() == Type::Int8Ty) return V;
   
@@ -43,9 +44,9 @@ static Value *isBytewiseValue(Value *V) {
   // corresponding integer value is "byteable".  An important case is 0.0. 
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
     if (CFP->getType() == Type::FloatTy)
-      V = ConstantExpr::getBitCast(CFP, Type::Int32Ty);
+      V = Context->getConstantExprBitCast(CFP, Type::Int32Ty);
     if (CFP->getType() == Type::DoubleTy)
-      V = ConstantExpr::getBitCast(CFP, Type::Int64Ty);
+      V = Context->getConstantExprBitCast(CFP, Type::Int64Ty);
     // Don't handle long double formats, which have strange constraints.
   }
   
@@ -68,7 +69,7 @@ static Value *isBytewiseValue(Value *V) {
         if (Val != Val2)
           return 0;
       }
-      return ConstantInt::get(Val);
+      return Context->getConstantInt(Val);
     }
   }
   
@@ -345,7 +346,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
   // Ensure that the value being stored is something that can be memset'able a
   // byte at a time like "0" or "-1" or any width, as well as things like
   // 0xA0A0A0A0 and 0.0.
-  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
+  Value *ByteVal = isBytewiseValue(SI->getOperand(0), Context);
   if (!ByteVal)
     return false;
 
@@ -384,7 +385,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     if (NextStore->isVolatile()) break;
     
     // Check to see if this stored value is of the same byte-splattable value.
-    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+    if (ByteVal != isBytewiseValue(NextStore->getOperand(0), Context))
       break;
 
     // Check to see if this store is to a constant offset from the start ptr.
@@ -438,15 +439,15 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     StartPtr = Range.StartPtr;
   
     // Cast the start ptr to be i8* as memset requires.
-    const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
+    const Type *i8Ptr = Context->getPointerTypeUnqual(Type::Int8Ty);
     if (StartPtr->getType() != i8Ptr)
       StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
                                  InsertPt);
   
     Value *Ops[] = {
       StartPtr, ByteVal,   // Start, value
-      ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),  // size
-      ConstantInt::get(Type::Int32Ty, Range.Alignment)   // align
+      Context->getConstantInt(Type::Int64Ty, Range.End-Range.Start),  // size
+      Context->getConstantInt(Type::Int32Ty, Range.Alignment)   // align
     };
     Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
     DEBUG(cerr << "Replace stores:\n";
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 293cf92..fa60a9d 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
@@ -198,8 +199,9 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
 /// LowerNegateToMultiply - Replace 0-X with X*-1.
 ///
 static Instruction *LowerNegateToMultiply(Instruction *Neg,
-                              std::map<AssertingVH<>, unsigned> &ValueRankMap) {
-  Constant *Cst = ConstantInt::getAllOnesValue(Neg->getType());
+                              std::map<AssertingVH<>, unsigned> &ValueRankMap,
+                              LLVMContext* Context) {
+  Constant *Cst = Context->getConstantIntAllOnesValue(Neg->getType());
 
   Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
   ValueRankMap.erase(Neg);
@@ -263,11 +265,13 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
   // transform them into multiplies by -1 so they can be reassociated.
   if (I->getOpcode() == Instruction::Mul) {
     if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
-      LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
+      LHS = LowerNegateToMultiply(cast<Instruction>(LHS),
+                                  ValueRankMap, Context);
       LHSBO = isReassociableOp(LHS, Opcode);
     }
     if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
-      RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
+      RHS = LowerNegateToMultiply(cast<Instruction>(RHS),
+                                  ValueRankMap, Context);
       RHSBO = isReassociableOp(RHS, Opcode);
     }
   }
@@ -280,8 +284,8 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
       Ops.push_back(ValueEntry(getRank(RHS), RHS));
       
       // Clear the leaves out.
-      I->setOperand(0, UndefValue::get(I->getType()));
-      I->setOperand(1, UndefValue::get(I->getType()));
+      I->setOperand(0, Context->getUndef(I->getType()));
+      I->setOperand(1, Context->getUndef(I->getType()));
       return;
     } else {
       // Turn X+(Y+Z) -> (Y+Z)+X
@@ -316,7 +320,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
   Ops.push_back(ValueEntry(getRank(RHS), RHS));
   
   // Clear the RHS leaf out.
-  I->setOperand(1, UndefValue::get(I->getType()));
+  I->setOperand(1, Context->getUndef(I->getType()));
 }
 
 // RewriteExprTree - Now that the operands for this expression tree are
@@ -453,15 +457,17 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
 /// by one, change this into a multiply by a constant to assist with further
 /// reassociation.
 static Instruction *ConvertShiftToMul(Instruction *Shl, 
-                              std::map<AssertingVH<>, unsigned> &ValueRankMap) {
+                              std::map<AssertingVH<>, unsigned> &ValueRankMap,
+                              LLVMContext* Context) {
   // If an operand of this shift is a reassociable multiply, or if the shift
   // is used by a reassociable multiply or add, turn into a multiply.
   if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
       (Shl->hasOneUse() && 
        (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
         isReassociableOp(Shl->use_back(), Instruction::Add)))) {
-    Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
-    MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+    Constant *MulCst = Context->getConstantInt(Shl->getType(), 1);
+    MulCst =
+        Context->getConstantExprShl(MulCst, cast<Constant>(Shl->getOperand(1)));
     
     Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
                                                  "", Shl);
@@ -561,7 +567,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
   if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
     if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
       Ops.pop_back();
-      Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
+      Ops.back().Op = Context->getConstantExpr(Opcode, V1, V2);
       return OptimizeExpression(I, Ops);
     }
 
@@ -617,10 +623,10 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
         if (FoundX != i) {
           if (Opcode == Instruction::And) {   // ...&X&~X = 0
             ++NumAnnihil;
-            return Constant::getNullValue(X->getType());
+            return Context->getNullValue(X->getType());
           } else if (Opcode == Instruction::Or) {   // ...|X|~X = -1
             ++NumAnnihil;
-            return ConstantInt::getAllOnesValue(X->getType());
+            return Context->getConstantIntAllOnesValue(X->getType());
           }
         }
       }
@@ -639,7 +645,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
           assert(Opcode == Instruction::Xor);
           if (e == 2) {
             ++NumAnnihil;
-            return Constant::getNullValue(Ops[0].Op->getType());
+            return Context->getNullValue(Ops[0].Op->getType());
           }
           // ... X^X -> ...
           Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
@@ -664,7 +670,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
           // Remove X and -X from the operand list.
           if (Ops.size() == 2) {
             ++NumAnnihil;
-            return Constant::getNullValue(X->getType());
+            return Context->getNullValue(X->getType());
           } else {
             Ops.erase(Ops.begin()+i);
             if (i < FoundX)
@@ -779,7 +785,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
     Instruction *BI = BBI++;
     if (BI->getOpcode() == Instruction::Shl &&
         isa<ConstantInt>(BI->getOperand(1)))
-      if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+      if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap, Context)) {
         MadeChange = true;
         BI = NI;
       }
@@ -801,7 +807,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
         if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
             (!BI->hasOneUse() ||
              !isReassociableOp(BI->use_back(), Instruction::Mul))) {
-          BI = LowerNegateToMultiply(BI, ValueRankMap);
+          BI = LowerNegateToMultiply(BI, ValueRankMap, Context);
           MadeChange = true;
         }
       }
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 46b2952..ac95d25 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
@@ -68,7 +69,7 @@ namespace {
 
         CastInst *AllocaInsertionPoint =
           CastInst::Create(Instruction::BitCast,
-                           Constant::getNullValue(Type::Int32Ty), Type::Int32Ty,
+                           Context->getNullValue(Type::Int32Ty), Type::Int32Ty,
                            "reg2mem alloca point", I);
 
         // Find the escaped instructions. But don't create stack slots for
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 3deee54..f0bc127 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -138,6 +139,7 @@ public:
 /// Constant Propagation.
 ///
 class SCCPSolver : public InstVisitor<SCCPSolver> {
+  LLVMContext* Context;
   DenseSet<BasicBlock*> BBExecutable;// The basic blocks that are executable
   std::map<Value*, LatticeVal> ValueState;  // The state each value is in.
 
@@ -177,6 +179,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   typedef std::pair<BasicBlock*, BasicBlock*> Edge;
   DenseSet<Edge> KnownFeasibleEdges;
 public:
+  void setContext(LLVMContext* C) { Context = C; }
 
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
@@ -437,7 +440,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
         Succs[0] = Succs[1] = true;
       } else if (BCValue.isConstant()) {
         // Constant condition variables mean the branch can only go a single way
-        Succs[BCValue.getConstant() == ConstantInt::getFalse()] = true;
+        Succs[BCValue.getConstant() == Context->getConstantIntFalse()] = true;
       }
     }
   } else if (isa<InvokeInst>(&TI)) {
@@ -482,7 +485,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
 
         // Constant condition variables mean the branch can only go a single way
         return BI->getSuccessor(BCValue.getConstant() ==
-                                       ConstantInt::getFalse()) == To;
+                                       Context->getConstantIntFalse()) == To;
       }
       return false;
     }
@@ -663,7 +666,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
   if (VState.isOverdefined())          // Inherit overdefinedness of operand
     markOverdefined(&I);
   else if (VState.isConstant())        // Propagate constant value
-    markConstant(&I, ConstantExpr::getCast(I.getOpcode(), 
+    markConstant(&I, Context->getConstantExprCast(I.getOpcode(), 
                                            VState.getConstant(), I.getType()));
 }
 
@@ -806,11 +809,12 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
         if (NonOverdefVal->isUndefined()) {
           // Could annihilate value.
           if (I.getOpcode() == Instruction::And)
-            markConstant(IV, &I, Constant::getNullValue(I.getType()));
+            markConstant(IV, &I, Context->getNullValue(I.getType()));
           else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
-            markConstant(IV, &I, ConstantVector::getAllOnesValue(PT));
+            markConstant(IV, &I, Context->getConstantVectorAllOnesValue(PT));
           else
-            markConstant(IV, &I, ConstantInt::getAllOnesValue(I.getType()));
+            markConstant(IV, &I,
+                         Context->getConstantIntAllOnesValue(I.getType()));
           return;
         } else {
           if (I.getOpcode() == Instruction::And) {
@@ -854,7 +858,8 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
               Result.markOverdefined();
               break;  // Cannot fold this operation over the PHI nodes!
             } else if (In1.isConstant() && In2.isConstant()) {
-              Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
+              Constant *V =
+                     Context->getConstantExpr(I.getOpcode(), In1.getConstant(),
                                               In2.getConstant());
               if (Result.isUndefined())
                 Result.markConstant(V);
@@ -902,7 +907,8 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
 
     markOverdefined(IV, &I);
   } else if (V1State.isConstant() && V2State.isConstant()) {
-    markConstant(IV, &I, ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+    markConstant(IV, &I,
+                Context->getConstantExpr(I.getOpcode(), V1State.getConstant(),
                                            V2State.getConstant()));
   }
 }
@@ -939,7 +945,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
               Result.markOverdefined();
               break;  // Cannot fold this operation over the PHI nodes!
             } else if (In1.isConstant() && In2.isConstant()) {
-              Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
+              Constant *V = Context->getConstantExprCompare(I.getPredicate(), 
                                                      In1.getConstant(), 
                                                      In2.getConstant());
               if (Result.isUndefined())
@@ -988,7 +994,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
 
     markOverdefined(IV, &I);
   } else if (V1State.isConstant() && V2State.isConstant()) {
-    markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
+    markConstant(IV, &I, Context->getConstantExprCompare(I.getPredicate(), 
                                                   V1State.getConstant(), 
                                                   V2State.getConstant()));
   }
@@ -1090,7 +1096,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
   Constant *Ptr = Operands[0];
   Operands.erase(Operands.begin());  // Erase the pointer from idx list...
 
-  markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0],
+  markConstant(IV, &I, Context->getConstantExprGetElementPtr(Ptr, &Operands[0],
                                                       Operands.size()));
 }
 
@@ -1124,7 +1130,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
     if (isa<ConstantPointerNull>(Ptr) && 
         cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
       // load null -> null
-      markConstant(IV, &I, Constant::getNullValue(I.getType()));
+      markConstant(IV, &I, Context->getNullValue(I.getType()));
       return;
     }
 
@@ -1365,21 +1371,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // to be handled here, because we don't know whether the top part is 1's
         // or 0's.
         assert(Op0LV.isUndefined());
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(LV, I, Context->getNullValue(ITy));
         return true;
       case Instruction::Mul:
       case Instruction::And:
         // undef * X -> 0.   X could be zero.
         // undef & X -> 0.   X could be zero.
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(LV, I, Context->getNullValue(ITy));
         return true;
 
       case Instruction::Or:
         // undef | X -> -1.   X could be -1.
         if (const VectorType *PTy = dyn_cast<VectorType>(ITy))
-          markForcedConstant(LV, I, ConstantVector::getAllOnesValue(PTy));
+          markForcedConstant(LV, I,
+                             Context->getConstantVectorAllOnesValue(PTy));
         else          
-          markForcedConstant(LV, I, ConstantInt::getAllOnesValue(ITy));
+          markForcedConstant(LV, I, Context->getConstantIntAllOnesValue(ITy));
         return true;
 
       case Instruction::SDiv:
@@ -1392,7 +1399,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // undef / X -> 0.   X could be maxint.
         // undef % X -> 0.   X could be 1.
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(LV, I, Context->getNullValue(ITy));
         return true;
         
       case Instruction::AShr:
@@ -1413,7 +1420,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // X >> undef -> 0.  X could be 0.
         // X << undef -> 0.  X could be 0.
-        markForcedConstant(LV, I, Constant::getNullValue(ITy));
+        markForcedConstant(LV, I, Context->getNullValue(ITy));
         return true;
       case Instruction::Select:
         // undef ? X : Y  -> X or Y.  There could be commonality between X/Y.
@@ -1476,7 +1483,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     // as undef, then further analysis could think the undef went another way
     // leading to an inconsistent set of conclusions.
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      BI->setCondition(ConstantInt::getFalse());
+      BI->setCondition(Context->getConstantIntFalse());
     } else {
       SwitchInst *SI = cast<SwitchInst>(TI);
       SI->setCondition(SI->getCaseValue(1));
@@ -1526,6 +1533,7 @@ FunctionPass *llvm::createSCCPPass() {
 bool SCCP::runOnFunction(Function &F) {
   DOUT << "SCCP on function '" << F.getNameStart() << "'\n";
   SCCPSolver Solver;
+  Solver.setContext(Context);
 
   // Mark the first block of the function as being executable.
   Solver.MarkBlockExecutable(F.begin());
@@ -1565,7 +1573,7 @@ bool SCCP::runOnFunction(Function &F) {
         Instruction *I = Insts.back();
         Insts.pop_back();
         if (!I->use_empty())
-          I->replaceAllUsesWith(UndefValue::get(I->getType()));
+          I->replaceAllUsesWith(Context->getUndef(I->getType()));
         BB->getInstList().erase(I);
         MadeChanges = true;
         ++NumInstRemoved;
@@ -1585,7 +1593,7 @@ bool SCCP::runOnFunction(Function &F) {
           continue;
         
         Constant *Const = IV.isConstant()
-          ? IV.getConstant() : UndefValue::get(Inst->getType());
+          ? IV.getConstant() : Context->getUndef(Inst->getType());
         DOUT << "  Constant: " << *Const << " = " << *Inst;
 
         // Replaces all of the uses of a variable with uses of the constant.
@@ -1701,7 +1709,7 @@ bool IPSCCP::runOnModule(Module &M) {
         LatticeVal &IV = Values[AI];
         if (IV.isConstant() || IV.isUndefined()) {
           Constant *CST = IV.isConstant() ?
-            IV.getConstant() : UndefValue::get(AI->getType());
+            IV.getConstant() : Context->getUndef(AI->getType());
           DOUT << "***  Arg " << *AI << " = " << *CST <<"\n";
 
           // Replaces all of the uses of a variable with uses of the
@@ -1726,7 +1734,7 @@ bool IPSCCP::runOnModule(Module &M) {
           Instruction *I = Insts.back();
           Insts.pop_back();
           if (!I->use_empty())
-            I->replaceAllUsesWith(UndefValue::get(I->getType()));
+            I->replaceAllUsesWith(Context->getUndef(I->getType()));
           BB->getInstList().erase(I);
           MadeChanges = true;
           ++IPNumInstRemoved;
@@ -1738,7 +1746,7 @@ bool IPSCCP::runOnModule(Module &M) {
             TI->getSuccessor(i)->removePredecessor(BB);
         }
         if (!TI->use_empty())
-          TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+          TI->replaceAllUsesWith(Context->getUndef(TI->getType()));
         BB->getInstList().erase(TI);
 
         if (&*BB != &F->front())
@@ -1757,7 +1765,7 @@ bool IPSCCP::runOnModule(Module &M) {
             continue;
           
           Constant *Const = IV.isConstant()
-            ? IV.getConstant() : UndefValue::get(Inst->getType());
+            ? IV.getConstant() : Context->getUndef(Inst->getType());
           DOUT << "  Constant: " << *Const << " = " << *Inst;
 
           // Replaces all of the uses of a variable with uses of the
@@ -1831,7 +1839,7 @@ bool IPSCCP::runOnModule(Module &M) {
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
         if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
           if (!isa<UndefValue>(RI->getOperand(0)))
-            RI->setOperand(0, UndefValue::get(F->getReturnType()));
+            RI->setOperand(0, Context->getUndef(F->getReturnType()));
     }
 
   // If we infered constant or undef values for globals variables, we can delete
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d89790c..109fb90 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -27,6 +27,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Target/TargetData.h"
@@ -240,7 +241,8 @@ bool SROA::performScalarRepl(Function &F) {
       DOUT << "Found alloca equal to global: " << *AI;
       DOUT << "  memcpy = " << *TheCopy;
       Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
-      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+      AI->replaceAllUsesWith(
+                        Context->getConstantExprBitCast(TheSrc, AI->getType()));
       TheCopy->eraseFromParent();  // Don't mutate the global.
       AI->eraseFromParent();
       ++NumGlobals;
@@ -305,7 +307,7 @@ bool SROA::performScalarRepl(Function &F) {
         DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n";
         
         // Create and insert the integer alloca.
-        const Type *NewTy = IntegerType::get(AllocaSize*8);
+        const Type *NewTy = Context->getIntegerType(AllocaSize*8);
         NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
         ConvertUsesToScalar(AI, NewAI, 0);
       }
@@ -369,7 +371,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
     //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 
     // (Also works for arrays instead of structs)
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      Value *Insert = UndefValue::get(LI->getType());
+      Value *Insert = Context->getUndef(LI->getType());
       for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
         Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
         Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
@@ -416,7 +418,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
       // expanded itself once the worklist is rerun.
       //
       SmallVector<Value*, 8> NewArgs;
-      NewArgs.push_back(Constant::getNullValue(Type::Int32Ty));
+      NewArgs.push_back(Context->getNullValue(Type::Int32Ty));
       NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
       RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
                                            NewArgs.end(), "", GEPI);
@@ -529,7 +531,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
 
   // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
   if (I == E ||
-      I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
+      I.getOperand() != Context->getNullValue(I.getOperand()->getType())) {
     return MarkUnsafe(Info);
   }
 
@@ -762,7 +764,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
   const Type *BytePtrTy = MI->getRawDest()->getType();
   bool SROADest = MI->getRawDest() == BCInst;
   
-  Constant *Zero = Constant::getNullValue(Type::Int32Ty);
+  Constant *Zero = Context->getNullValue(Type::Int32Ty);
 
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // If this is a memcpy/memmove, emit a GEP of the other element address.
@@ -770,7 +772,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
     unsigned OtherEltAlign = MemAlignment;
     
     if (OtherPtr) {
-      Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) };
+      Value *Idx[2] = { Zero, Context->getConstantInt(Type::Int32Ty, i) };
       OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
                                            OtherPtr->getNameStr()+"."+utostr(i),
                                            MI);
@@ -817,7 +819,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       Constant *StoreVal;
       if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
         if (CI->isZero()) {
-          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
+          StoreVal = Context->getNullValue(EltTy);  // 0.0, null, 0, <0,0>
         } else {
           // If EltTy is a vector type, get the element type.
           const Type *ValTy = EltTy->getScalarType();
@@ -833,18 +835,18 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
           }
           
           // Convert the integer value to the appropriate type.
-          StoreVal = ConstantInt::get(TotalVal);
+          StoreVal = Context->getConstantInt(TotalVal);
           if (isa<PointerType>(ValTy))
-            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
+            StoreVal = Context->getConstantExprIntToPtr(StoreVal, ValTy);
           else if (ValTy->isFloatingPoint())
-            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
+            StoreVal = Context->getConstantExprBitCast(StoreVal, ValTy);
           assert(StoreVal->getType() == ValTy && "Type mismatch!");
           
           // If the requested value was a vector constant, create it.
           if (EltTy != ValTy) {
             unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
             SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
-            StoreVal = ConstantVector::get(&Elts[0], NumElts);
+            StoreVal = Context->getConstantVector(&Elts[0], NumElts);
           }
         }
         new StoreInst(StoreVal, EltPtr, MI);
@@ -870,15 +872,15 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       Value *Ops[] = {
         SROADest ? EltPtr : OtherElt,  // Dest ptr
         SROADest ? OtherElt : EltPtr,  // Src ptr
-        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
-        ConstantInt::get(Type::Int32Ty, OtherEltAlign)  // Align
+        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
+        Context->getConstantInt(Type::Int32Ty, OtherEltAlign)  // Align
       };
       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
     } else {
       assert(isa<MemSetInst>(MI));
       Value *Ops[] = {
         EltPtr, MI->getOperand(2),  // Dest, Value,
-        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
         Zero  // Align
       };
       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
@@ -907,7 +909,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
     return;
   // Handle tail padding by extending the operand
   if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
-    SrcVal = new ZExtInst(SrcVal, IntegerType::get(AllocaSizeBits), "", SI);
+    SrcVal = new ZExtInst(SrcVal,
+                          Context->getIntegerType(AllocaSizeBits), "", SI);
 
   DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
 
@@ -926,7 +929,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       Value *EltVal = SrcVal;
       if (Shift) {
-        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
                                             "sroa.store.elt", SI);
       }
@@ -938,7 +941,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       if (FieldSizeBits == 0) continue;
       
       if (FieldSizeBits != AllocaSizeBits)
-        EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI);
+        EltVal = new TruncInst(EltVal,
+                               Context->getIntegerType(FieldSizeBits), "", SI);
       Value *DestField = NewElts[i];
       if (EltVal->getType() == FieldTy) {
         // Storing to an integer field of this size, just do it.
@@ -948,7 +952,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
         DestField = new BitCastInst(DestField,
-                                    PointerType::getUnqual(EltVal->getType()),
+                              Context->getPointerTypeUnqual(EltVal->getType()),
                                     "", SI);
       }
       new StoreInst(EltVal, DestField, SI);
@@ -973,14 +977,15 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       Value *EltVal = SrcVal;
       if (Shift) {
-        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
                                             "sroa.store.elt", SI);
       }
       
       // Truncate down to an integer of the right size.
       if (ElementSizeBits != AllocaSizeBits)
-        EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI);
+        EltVal = new TruncInst(EltVal, 
+                               Context->getIntegerType(ElementSizeBits),"",SI);
       Value *DestField = NewElts[i];
       if (EltVal->getType() == ArrayEltTy) {
         // Storing to an integer field of this size, just do it.
@@ -990,7 +995,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
         DestField = new BitCastInst(DestField,
-                                    PointerType::getUnqual(EltVal->getType()),
+                              Context->getPointerTypeUnqual(EltVal->getType()),
                                     "", SI);
       }
       new StoreInst(EltVal, DestField, SI);
@@ -1034,7 +1039,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
     ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
   }    
     
-  Value *ResultVal = Constant::getNullValue(IntegerType::get(AllocaSizeBits));
+  Value *ResultVal =
+                 Context->getNullValue(Context->getIntegerType(AllocaSizeBits));
   
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // Load the value from the alloca.  If the NewElt is an aggregate, cast
@@ -1047,10 +1053,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
     // Ignore zero sized fields like {}, they obviously contain no data.
     if (FieldSizeBits == 0) continue;
     
-    const IntegerType *FieldIntTy = IntegerType::get(FieldSizeBits);
+    const IntegerType *FieldIntTy = Context->getIntegerType(FieldSizeBits);
     if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
         !isa<VectorType>(FieldTy))
-      SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy),
+      SrcField = new BitCastInst(SrcField,
+                                 Context->getPointerTypeUnqual(FieldIntTy),
                                  "", LI);
     SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
 
@@ -1075,7 +1082,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
       Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
     
     if (Shift) {
-      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
+      Value *ShiftVal = Context->getConstantInt(SrcField->getType(), Shift);
       SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
     }
 
@@ -1179,7 +1186,7 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
     return;
 
   if (NumElements == 1) {
-    GEPI->setOperand(2, Constant::getNullValue(Type::Int32Ty));
+    GEPI->setOperand(2, Context->getNullValue(Type::Int32Ty));
     return;
   } 
     
@@ -1187,16 +1194,16 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
   // All users of the GEP must be loads.  At each use of the GEP, insert
   // two loads of the appropriate indexed GEP and select between them.
   Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), 
-                              Constant::getNullValue(I.getOperand()->getType()),
+                              Context->getNullValue(I.getOperand()->getType()),
                               "isone", GEPI);
   // Insert the new GEP instructions, which are properly indexed.
   SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
-  Indices[1] = Constant::getNullValue(Type::Int32Ty);
+  Indices[1] = Context->getNullValue(Type::Int32Ty);
   Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
                                              Indices.begin(),
                                              Indices.end(),
                                              GEPI->getName()+".0", GEPI);
-  Indices[1] = ConstantInt::get(Type::Int32Ty, 1);
+  Indices[1] = Context->getConstantInt(Type::Int32Ty, 1);
   Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
                                             Indices.begin(),
                                             Indices.end(),
@@ -1253,7 +1260,8 @@ void SROA::CleanupAllocaUsers(AllocationInst *AI) {
 ///      large) integer type with extract and insert operations where the loads
 ///      and stores would mutate the memory.
 static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
-                        unsigned AllocaSize, const TargetData &TD) {
+                        unsigned AllocaSize, const TargetData &TD,
+                        LLVMContext* Context) {
   // If this could be contributing to a vector, analyze it.
   if (VecTy != Type::VoidTy) { // either null or a vector type.
 
@@ -1281,7 +1289,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
            cast<VectorType>(VecTy)->getElementType()
                  ->getPrimitiveSizeInBits()/8 == EltSize)) {
         if (VecTy == 0)
-          VecTy = VectorType::get(In, AllocaSize/EltSize);
+          VecTy = Context->getVectorType(In, AllocaSize/EltSize);
         return;
       }
     }
@@ -1312,7 +1320,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
       // Don't break volatile loads.
       if (LI->isVolatile())
         return false;
-      MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD);
+      MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, Context);
       SawVec |= isa<VectorType>(LI->getType());
       continue;
     }
@@ -1320,7 +1328,8 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       // Storing the pointer, not into the value?
       if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
-      MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD);
+      MergeInType(SI->getOperand(0)->getType(), Offset,
+                  VecTy, AllocaSize, *TD, Context);
       SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
       continue;
     }
@@ -1449,8 +1458,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
             APVal |= APVal << 8;
         
         Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
-        Value *New = ConvertScalar_InsertValue(ConstantInt::get(APVal), Old,
-                                               Offset, Builder);
+        Value *New = ConvertScalar_InsertValue(Context->getConstantInt(APVal),
+                                               Old, Offset, Builder);
         Builder.CreateStore(New, NewAI);
       }
       MSI->eraseFromParent();
@@ -1537,7 +1546,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
     }
     // Return the element extracted out of it.
     Value *V = Builder.CreateExtractElement(FromVal,
-                                            ConstantInt::get(Type::Int32Ty,Elt),
+                                    Context->getConstantInt(Type::Int32Ty,Elt),
                                             "tmp");
     if (V->getType() != ToType)
       V = Builder.CreateBitCast(V, ToType, "tmp");
@@ -1548,7 +1557,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // use insertvalue's to form the FCA.
   if (const StructType *ST = dyn_cast<StructType>(ToType)) {
     const StructLayout &Layout = *TD->getStructLayout(ST);
-    Value *Res = UndefValue::get(ST);
+    Value *Res = Context->getUndef(ST);
     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
       Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
                                         Offset+Layout.getElementOffsetInBits(i),
@@ -1560,7 +1569,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   
   if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
     uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
-    Value *Res = UndefValue::get(AT);
+    Value *Res = Context->getUndef(AT);
     for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
       Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
                                               Offset+i*EltSize, Builder);
@@ -1589,18 +1598,22 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // We do this to support (f.e.) loads off the end of a structure where
   // only some bits are used.
   if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateLShr(FromVal, ConstantInt::get(FromVal->getType(),
+    FromVal = Builder.CreateLShr(FromVal,
+                                 Context->getConstantInt(FromVal->getType(),
                                                            ShAmt), "tmp");
   else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateShl(FromVal, ConstantInt::get(FromVal->getType(),
+    FromVal = Builder.CreateShl(FromVal, 
+                                Context->getConstantInt(FromVal->getType(),
                                                           -ShAmt), "tmp");
 
   // Finally, unconditionally truncate the integer to the right width.
   unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
   if (LIBitWidth < NTy->getBitWidth())
-    FromVal = Builder.CreateTrunc(FromVal, IntegerType::get(LIBitWidth), "tmp");
+    FromVal =
+      Builder.CreateTrunc(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
   else if (LIBitWidth > NTy->getBitWidth())
-    FromVal = Builder.CreateZExt(FromVal, IntegerType::get(LIBitWidth), "tmp");
+    FromVal =
+       Builder.CreateZExt(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
 
   // If the result is an integer, this is a trunc or bitcast.
   if (isa<IntegerType>(ToType)) {
@@ -1651,7 +1664,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
       SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
     
     SV = Builder.CreateInsertElement(Old, SV, 
-                                     ConstantInt::get(Type::Int32Ty, Elt),
+                                   Context->getConstantInt(Type::Int32Ty, Elt),
                                      "tmp");
     return SV;
   }
@@ -1684,7 +1697,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
   unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
   if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
-    SV = Builder.CreateBitCast(SV, IntegerType::get(SrcWidth), "tmp");
+    SV = Builder.CreateBitCast(SV, Context->getIntegerType(SrcWidth), "tmp");
   else if (isa<PointerType>(SV->getType()))
     SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp");
 
@@ -1719,10 +1732,12 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // only some bits in the structure are set.
   APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
   if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
-    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt), "tmp");
+    SV = Builder.CreateShl(SV, Context->getConstantInt(SV->getType(),
+                           ShAmt), "tmp");
     Mask <<= ShAmt;
   } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
-    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt), "tmp");
+    SV = Builder.CreateLShr(SV, Context->getConstantInt(SV->getType(),
+                            -ShAmt), "tmp");
     Mask = Mask.lshr(-ShAmt);
   }
 
@@ -1730,7 +1745,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // in the new bits.
   if (SrcWidth != DestWidth) {
     assert(DestWidth > SrcWidth);
-    Old = Builder.CreateAnd(Old, ConstantInt::get(~Mask), "mask");
+    Old = Builder.CreateAnd(Old, Context->getConstantInt(~Mask), "mask");
     SV = Builder.CreateOr(Old, SV, "ins");
   }
   return SV;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 5a85a04..b8bce80 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Attributes.h"
 #include "llvm/Support/CFG.h"
@@ -57,7 +58,7 @@ FunctionPass *llvm::createCFGSimplificationPass() {
 
 /// ChangeToUnreachable - Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I) {
+static void ChangeToUnreachable(Instruction *I, LLVMContext* Context) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
@@ -70,7 +71,7 @@ static void ChangeToUnreachable(Instruction *I) {
   BasicBlock::iterator BBI = I, BBE = BB->end();
   while (BBI != BBE) {
     if (!BBI->use_empty())
-      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+      BBI->replaceAllUsesWith(Context->getUndef(BBI->getType()));
     BB->getInstList().erase(BBI++);
   }
 }
@@ -95,7 +96,8 @@ static void ChangeToCall(InvokeInst *II) {
 }
 
 static bool MarkAliveBlocks(BasicBlock *BB,
-                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
+                            SmallPtrSet<BasicBlock*, 128> &Reachable,
+                            LLVMContext* Context) {
   
   SmallVector<BasicBlock*, 128> Worklist;
   Worklist.push_back(BB);
@@ -118,7 +120,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
           // though.
           ++BBI;
           if (!isa<UnreachableInst>(BBI)) {
-            ChangeToUnreachable(BBI);
+            ChangeToUnreachable(BBI, Context);
             Changed = true;
           }
           break;
@@ -131,7 +133,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         if (isa<UndefValue>(Ptr) ||
             (isa<ConstantPointerNull>(Ptr) &&
              cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) {
-          ChangeToUnreachable(SI);
+          ChangeToUnreachable(SI, Context);
           Changed = true;
           break;
         }
@@ -157,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
 /// otherwise.
 static bool RemoveUnreachableBlocksFromFn(Function &F) {
   SmallPtrSet<BasicBlock*, 128> Reachable;
-  bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+  bool Changed = MarkAliveBlocks(F.begin(), Reachable, F.getContext());
   
   // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index bbcb792..ec48469 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -20,6 +20,7 @@
 #define DEBUG_TYPE "simplify-libcalls"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/IRBuilder.h"
@@ -47,6 +48,7 @@ class VISIBILITY_HIDDEN LibCallOptimization {
 protected:
   Function *Caller;
   const TargetData *TD;
+  LLVMContext* Context;
 public:
   LibCallOptimization() { }
   virtual ~LibCallOptimization() {}
@@ -62,6 +64,8 @@ public:
   Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) {
     Caller = CI->getParent()->getParent();
     this->TD = &TD;
+    if (CI->getCalledFunction())
+      Context = CI->getCalledFunction()->getContext();
     return CallOptimizer(CI->getCalledFunction(), CI, B);
   }
 
@@ -119,7 +123,8 @@ public:
 
 /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
 Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
-  return B.CreateBitCast(V, PointerType::getUnqual(Type::Int8Ty), "cstr");
+  return
+        B.CreateBitCast(V, Context->getPointerTypeUnqual(Type::Int8Ty), "cstr");
 }
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -133,7 +138,7 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
 
   Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
                                            TD->getIntPtrType(),
-                                           PointerType::getUnqual(Type::Int8Ty),
+                                    Context->getPointerTypeUnqual(Type::Int8Ty),
                                            NULL);
   CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
   if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
@@ -152,7 +157,7 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
   Tys[0] = Len->getType();
   Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);
   return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
-                       ConstantInt::get(Type::Int32Ty, Align));
+                       Context->getConstantInt(Type::Int32Ty, Align));
 }
 
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
@@ -164,8 +169,8 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
   AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
 
   Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
-                                         PointerType::getUnqual(Type::Int8Ty),
-                                         PointerType::getUnqual(Type::Int8Ty),
+                                    Context->getPointerTypeUnqual(Type::Int8Ty),
+                                    Context->getPointerTypeUnqual(Type::Int8Ty),
                                          Type::Int32Ty, TD->getIntPtrType(),
                                          NULL);
   CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
@@ -188,8 +193,8 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 
   Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
                                          Type::Int32Ty,
-                                         PointerType::getUnqual(Type::Int8Ty),
-                                         PointerType::getUnqual(Type::Int8Ty),
+                                    Context->getPointerTypeUnqual(Type::Int8Ty),
+                                    Context->getPointerTypeUnqual(Type::Int8Ty),
                                          TD->getIntPtrType(), NULL);
   CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
                                Len, "memcmp");
@@ -208,7 +213,7 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
  const Type *Tys[1];
  Tys[0] = Len->getType();
  Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
- Value *Align = ConstantInt::get(Type::Int32Ty, 1);
+ Value *Align = Context->getConstantInt(Type::Int32Ty, 1);
  return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
 }
 
@@ -267,7 +272,7 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
 
   Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
                                        Type::Int32Ty,
-                                       PointerType::getUnqual(Type::Int8Ty),
+                                    Context->getPointerTypeUnqual(Type::Int8Ty),
                                        NULL);
   CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
   if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
@@ -307,11 +312,11 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
   Constant *F;
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty,
-                               PointerType::getUnqual(Type::Int8Ty),
+                               Context->getPointerTypeUnqual(Type::Int8Ty),
                                File->getType(), NULL);
   else
     F = M->getOrInsertFunction("fputs", Type::Int32Ty,
-                               PointerType::getUnqual(Type::Int8Ty),
+                               Context->getPointerTypeUnqual(Type::Int8Ty),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
 
@@ -332,16 +337,16 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
                                TD->getIntPtrType(),
-                               PointerType::getUnqual(Type::Int8Ty),
+                               Context->getPointerTypeUnqual(Type::Int8Ty),
                                TD->getIntPtrType(), TD->getIntPtrType(),
                                File->getType(), NULL);
   else
     F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(),
-                               PointerType::getUnqual(Type::Int8Ty),
+                               Context->getPointerTypeUnqual(Type::Int8Ty),
                                TD->getIntPtrType(), TD->getIntPtrType(),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
-                               ConstantInt::get(TD->getIntPtrType(), 1), File);
+                        Context->getConstantInt(TD->getIntPtrType(), 1), File);
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
@@ -540,7 +545,7 @@ struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
     // Verify the "strcat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType())
       return 0;
@@ -574,7 +579,8 @@ struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
     
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-    EmitMemCpy(CpyDst, Src, ConstantInt::get(TD->getIntPtrType(), Len+1), 1, B);
+    EmitMemCpy(CpyDst, Src,
+               Context->getConstantInt(TD->getIntPtrType(), Len+1), 1, B);
   }
 };
 
@@ -586,7 +592,7 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
     // Verify the "strncat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
-        FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType() ||
         !isa<IntegerType>(FT->getParamType(2)))
@@ -631,7 +637,7 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
     // Verify the "strchr" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
         FT->getParamType(0) != FT->getReturnType())
       return 0;
     
@@ -646,7 +652,7 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
         return 0;
       
       return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
-                        ConstantInt::get(TD->getIntPtrType(), Len), B);
+                        Context->getConstantInt(TD->getIntPtrType(), Len), B);
     }
 
     // Otherwise, the character is a constant, see if the first argument is
@@ -663,7 +669,7 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
     uint64_t i = 0;
     while (1) {
       if (i == Str.size())    // Didn't find the char.  strchr returns null.
-        return Constant::getNullValue(CI->getType());
+        return Context->getNullValue(CI->getType());
       // Did we find our match?
       if (Str[i] == CharValue)
         break;
@@ -671,7 +677,7 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
     }
     
     // strchr(s+n,c)  -> gep(s+n+i,c)
-    Value *Idx = ConstantInt::get(Type::Int64Ty, i);
+    Value *Idx = Context->getConstantInt(Type::Int64Ty, i);
     return B.CreateGEP(SrcStr, Idx, "strchr");
   }
 };
@@ -685,12 +691,12 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty))
+        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty))
       return 0;
     
     Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
     if (Str1P == Str2P)      // strcmp(x,x)  -> 0
-      return ConstantInt::get(CI->getType(), 0);
+      return Context->getConstantInt(CI->getType(), 0);
     
     std::string Str1, Str2;
     bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
@@ -704,14 +710,15 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
     
     // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2)
-      return ConstantInt::get(CI->getType(), strcmp(Str1.c_str(),Str2.c_str()));
+      return Context->getConstantInt(CI->getType(), 
+                                     strcmp(Str1.c_str(),Str2.c_str()));
 
     // strcmp(P, "x") -> memcmp(P, "x", 2)
     uint64_t Len1 = GetStringLength(Str1P);
     uint64_t Len2 = GetStringLength(Str2P);
     if (Len1 && Len2) {
       return EmitMemCmp(Str1P, Str2P,
-                        ConstantInt::get(TD->getIntPtrType(),
+                        Context->getConstantInt(TD->getIntPtrType(),
                         std::min(Len1, Len2)), B);
     }
 
@@ -728,13 +735,13 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
         !isa<IntegerType>(FT->getParamType(2)))
       return 0;
     
     Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
     if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
-      return ConstantInt::get(CI->getType(), 0);
+      return Context->getConstantInt(CI->getType(), 0);
     
     // Get the length argument if it is constant.
     uint64_t Length;
@@ -744,7 +751,7 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
       return 0;
     
     if (Length == 0) // strncmp(x,y,0)   -> 0
-      return ConstantInt::get(CI->getType(), 0);
+      return Context->getConstantInt(CI->getType(), 0);
     
     std::string Str1, Str2;
     bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
@@ -758,7 +765,7 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
     
     // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2)
-      return ConstantInt::get(CI->getType(),
+      return Context->getConstantInt(CI->getType(),
                               strncmp(Str1.c_str(), Str2.c_str(), Length));
     return 0;
   }
@@ -774,7 +781,7 @@ struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty))
+        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty))
       return 0;
     
     Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
@@ -787,7 +794,8 @@ struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
     
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-    EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len), 1, B);
+    EmitMemCpy(Dst, Src,
+               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B);
     return Dst;
   }
 };
@@ -800,7 +808,7 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
         !isa<IntegerType>(FT->getParamType(2)))
       return 0;
 
@@ -815,7 +823,7 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
 
     if (SrcLen == 0) {
       // strncpy(x, "", y) -> memset(x, '\0', y, 1)
-      EmitMemSet(Dst, ConstantInt::get(Type::Int8Ty, '\0'), LenOp, B);
+      EmitMemSet(Dst, Context->getConstantInt(Type::Int8Ty, '\0'), LenOp, B);
       return Dst;
     }
 
@@ -831,7 +839,8 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
     if (Len > SrcLen+1) return 0;
 
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
-    EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len), 1, B);
+    EmitMemCpy(Dst, Src,
+               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B);
 
     return Dst;
   }
@@ -844,7 +853,7 @@ struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
-        FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
         !isa<IntegerType>(FT->getReturnType()))
       return 0;
     
@@ -852,7 +861,7 @@ struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
 
     // Constant folding: strlen("xyz") -> 3
     if (uint64_t Len = GetStringLength(Src))
-      return ConstantInt::get(CI->getType(), Len-1);
+      return Context->getConstantInt(CI->getType(), Len-1);
 
     // Handle strlen(p) != 0.
     if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0;
@@ -899,7 +908,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
     Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
 
     if (LHS == RHS)  // memcmp(s,s,x) -> 0
-      return Constant::getNullValue(CI->getType());
+      return Context->getNullValue(CI->getType());
 
     // Make sure we have a constant length.
     ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
@@ -907,7 +916,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
     uint64_t Len = LenC->getZExtValue();
 
     if (Len == 0) // memcmp(s1,s2,0) -> 0
-      return Constant::getNullValue(CI->getType());
+      return Context->getNullValue(CI->getType());
 
     if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
       Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
@@ -918,7 +927,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
     // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS)  != 0
     // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS)  != 0
     if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
-      const Type *PTy = PointerType::getUnqual(Len == 2 ?
+      const Type *PTy = Context->getPointerTypeUnqual(Len == 2 ?
                                                Type::Int16Ty : Type::Int32Ty);
       LHS = B.CreateBitCast(LHS, PTy, "tmp");
       RHS = B.CreateBitCast(RHS, PTy, "tmp");
@@ -971,7 +980,7 @@ struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization {
     Value *Dst = CastToCStr(CI->getOperand(1), B);
     Value *Src = CastToCStr(CI->getOperand(2), B);
     Value *Size = CI->getOperand(3);
-    Value *Align = ConstantInt::get(Type::Int32Ty, 1);
+    Value *Align = Context->getConstantInt(Type::Int32Ty, 1);
     B.CreateCall4(MemMove, Dst, Src, Size, Align);
     return CI->getOperand(1);
   }
@@ -1025,7 +1034,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
     if (Op2C == 0) return 0;
     
     if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
-      return ConstantFP::get(CI->getType(), 1.0);
+      return Context->getConstantFP(CI->getType(), 1.0);
     
     if (Op2C->isExactlyValue(0.5)) {
       // FIXME: This is not safe for -0.0 and -inf.  This can only be done when
@@ -1045,7 +1054,8 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
     if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
       return B.CreateFMul(Op1, Op1, "pow2");
     if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
-      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+      return B.CreateFDiv(Context->getConstantFP(CI->getType(), 1.0),
+                          Op1, "powrecip");
     return 0;
   }
 };
@@ -1083,9 +1093,9 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
       else
         Name = "ldexpl";
 
-      Constant *One = ConstantFP::get(APFloat(1.0f));
+      Constant *One = Context->getConstantFP(APFloat(1.0f));
       if (Op->getType() != Type::FloatTy)
-        One = ConstantExpr::getFPExtend(One, Op->getType());
+        One = Context->getConstantExprFPExtend(One, Op->getType());
 
       Module *M = Caller->getParent();
       Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
@@ -1143,8 +1153,8 @@ struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization {
     // Constant fold.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
       if (CI->getValue() == 0)  // ffs(0) -> 0.
-        return Constant::getNullValue(CI->getType());
-      return ConstantInt::get(Type::Int32Ty, // ffs(c) -> cttz(c)+1
+        return Context->getNullValue(CI->getType());
+      return Context->getConstantInt(Type::Int32Ty, // ffs(c) -> cttz(c)+1
                               CI->getValue().countTrailingZeros()+1);
     }
     
@@ -1153,11 +1163,11 @@ struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization {
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
                                          Intrinsic::cttz, &ArgType, 1);
     Value *V = B.CreateCall(F, Op, "cttz");
-    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+    V = B.CreateAdd(V, Context->getConstantInt(V->getType(), 1), "tmp");
     V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp");
     
-    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
-    return B.CreateSelect(Cond, V, ConstantInt::get(Type::Int32Ty, 0));
+    Value *Cond = B.CreateICmpNE(Op, Context->getNullValue(ArgType), "tmp");
+    return B.CreateSelect(Cond, V, Context->getConstantInt(Type::Int32Ty, 0));
   }
 };
 
@@ -1174,8 +1184,10 @@ struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {
     
     // isdigit(c) -> (c-'0') <u 10
     Value *Op = CI->getOperand(1);
-    Op = B.CreateSub(Op, ConstantInt::get(Type::Int32Ty, '0'), "isdigittmp");
-    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 10), "isdigit");
+    Op = B.CreateSub(Op, Context->getConstantInt(Type::Int32Ty, '0'), 
+                     "isdigittmp");
+    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 10), 
+                         "isdigit");
     return B.CreateZExt(Op, CI->getType());
   }
 };
@@ -1193,7 +1205,8 @@ struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization {
     
     // isascii(c) -> c <u 128
     Value *Op = CI->getOperand(1);
-    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 128), "isascii");
+    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 128),
+                         "isascii");
     return B.CreateZExt(Op, CI->getType());
   }
 };
@@ -1211,7 +1224,8 @@ struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization {
     
     // abs(x) -> x >s -1 ? x : -x
     Value *Op = CI->getOperand(1);
-    Value *Pos = B.CreateICmpSGT(Op,ConstantInt::getAllOnesValue(Op->getType()),
+    Value *Pos = B.CreateICmpSGT(Op, 
+                             Context->getConstantIntAllOnesValue(Op->getType()),
                                  "ispos");
     Value *Neg = B.CreateNeg(Op, "neg");
     return B.CreateSelect(Pos, Op, Neg);
@@ -1231,7 +1245,8 @@ struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {
       return 0;
     
     // isascii(c) -> c & 0x7f
-    return B.CreateAnd(CI->getOperand(1), ConstantInt::get(CI->getType(),0x7F));
+    return B.CreateAnd(CI->getOperand(1),
+                       Context->getConstantInt(CI->getType(),0x7F));
   }
 };
 
@@ -1258,12 +1273,14 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
 
     // Empty format string -> noop.
     if (FormatStr.empty())  // Tolerate printf's declared void.
-      return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0);
+      return CI->use_empty() ? (Value*)CI : 
+                               Context->getConstantInt(CI->getType(), 0);
     
     // printf("x") -> putchar('x'), even for '%'.
     if (FormatStr.size() == 1) {
-      EmitPutChar(ConstantInt::get(Type::Int32Ty, FormatStr[0]), B);
-      return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1);
+      EmitPutChar(Context->getConstantInt(Type::Int32Ty, FormatStr[0]), B);
+      return CI->use_empty() ? (Value*)CI : 
+                               Context->getConstantInt(CI->getType(), 1);
     }
     
     // printf("foo\n") --> puts("foo")
@@ -1272,12 +1289,12 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
       // Create a string literal with no \n on it.  We expect the constant merge
       // pass to be run after this pass, to merge duplicate strings.
       FormatStr.erase(FormatStr.end()-1);
-      Constant *C = ConstantArray::get(FormatStr, true);
+      Constant *C = Context->getConstantArray(FormatStr, true);
       C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage,
                              C, "str", Callee->getParent());
       EmitPutS(C, B);
       return CI->use_empty() ? (Value*)CI : 
-                          ConstantInt::get(CI->getType(), FormatStr.size()+1);
+                    Context->getConstantInt(CI->getType(), FormatStr.size()+1);
     }
     
     // Optimize specific format strings.
@@ -1285,7 +1302,8 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
     if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
         isa<IntegerType>(CI->getOperand(2)->getType())) {
       EmitPutChar(CI->getOperand(2), B);
-      return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1);
+      return CI->use_empty() ? (Value*)CI : 
+                               Context->getConstantInt(CI->getType(), 1);
     }
     
     // printf("%s\n", str) --> puts(str)
@@ -1326,8 +1344,8 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
       
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
-                 ConstantInt::get(TD->getIntPtrType(), FormatStr.size()+1),1,B);
-      return ConstantInt::get(CI->getType(), FormatStr.size());
+          Context->getConstantInt(TD->getIntPtrType(), FormatStr.size()+1),1,B);
+      return Context->getConstantInt(CI->getType(), FormatStr.size());
     }
     
     // The remaining optimizations require the format string to be "%s" or "%c"
@@ -1342,10 +1360,10 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
       Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char");
       Value *Ptr = CastToCStr(CI->getOperand(1), B);
       B.CreateStore(V, Ptr);
-      Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::Int32Ty, 1), "nul");
-      B.CreateStore(Constant::getNullValue(Type::Int8Ty), Ptr);
+      Ptr = B.CreateGEP(Ptr, Context->getConstantInt(Type::Int32Ty, 1), "nul");
+      B.CreateStore(Context->getNullValue(Type::Int8Ty), Ptr);
       
-      return ConstantInt::get(CI->getType(), 1);
+      return Context->getConstantInt(CI->getType(), 1);
     }
     
     if (FormatStr[1] == 's') {
@@ -1353,7 +1371,8 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
       if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
 
       Value *Len = EmitStrLen(CI->getOperand(3), B);
-      Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1),
+      Value *IncLen = B.CreateAdd(Len,
+                                  Context->getConstantInt(Len->getType(), 1),
                                   "leninc");
       EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
       
@@ -1386,13 +1405,13 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
     
     // If this is writing zero records, remove the call (it's a noop).
     if (Bytes == 0)
-      return ConstantInt::get(CI->getType(), 0);
+      return Context->getConstantInt(CI->getType(), 0);
     
     // If this is writing one byte, turn it into fputc.
     if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
       EmitFPutC(Char, CI->getOperand(4), B);
-      return ConstantInt::get(CI->getType(), 1);
+      return Context->getConstantInt(CI->getType(), 1);
     }
 
     return 0;
@@ -1414,7 +1433,8 @@ struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {
     // fputs(s,F) --> fwrite(s,1,strlen(s),F)
     uint64_t Len = GetStringLength(CI->getOperand(1));
     if (!Len) return 0;
-    EmitFWrite(CI->getOperand(1), ConstantInt::get(TD->getIntPtrType(), Len-1),
+    EmitFWrite(CI->getOperand(1),
+               Context->getConstantInt(TD->getIntPtrType(), Len-1),
                CI->getOperand(2), B);
     return CI;  // Known to have no uses (see above).
   }
@@ -1443,10 +1463,10 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
         if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
           return 0; // We found a format specifier.
       
-      EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(),
+      EmitFWrite(CI->getOperand(2), Context->getConstantInt(TD->getIntPtrType(),
                                                      FormatStr.size()),
                  CI->getOperand(1), B);
-      return ConstantInt::get(CI->getType(), FormatStr.size());
+      return Context->getConstantInt(CI->getType(), FormatStr.size());
     }
     
     // The remaining optimizations require the format string to be "%s" or "%c"
@@ -1459,7 +1479,7 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
       // fprintf(F, "%c", chr) --> *(i8*)dst = chr
       if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
       EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
-      return ConstantInt::get(CI->getType(), 1);
+      return Context->getConstantInt(CI->getType(), 1);
     }
     
     if (FormatStr[1] == 's') {
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 99a7dee..c037ee9 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -317,9 +317,12 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
   //
   BI = Branch; ++BI;  // Get an iterator to the first new instruction
   for (; BI != SourceBlock->end(); ++BI)
-    for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i)
-      if (Value *Remapped = ValueMapping[BI->getOperand(i)])
-        BI->setOperand(i, Remapped);
+    for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+      std::map<Value*, Value*>::const_iterator I =
+        ValueMapping.find(BI->getOperand(i));
+      if (I != ValueMapping.end())
+        BI->setOperand(i, I->second);
+    }
 
   // Next we check to see if any of the successors of DestBlock had PHI nodes.
   // If so, we need to add entries to the PHI nodes for SourceBlock now.
@@ -333,8 +336,9 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
       Value *IV = PN->getIncomingValueForBlock(DestBlock);
 
       // Remap the value if necessary...
-      if (Value *MappedIV = ValueMapping[IV])
-        IV = MappedIV;
+      std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
+      if (I != ValueMapping.end())
+        IV = I->second;
       PN->addIncoming(IV, SourceBlock);
     }
   }
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index d68bf02..10cae5c 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMTransformUtils
   Mem2Reg.cpp
   PromoteMemoryToRegister.cpp
   SimplifyCFG.cpp
+  SSI.cpp
   UnifyFunctionExitNodes.cpp
   UnrollLoop.cpp
   ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 337fa8a..82f5b93 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -35,7 +35,7 @@ Module *llvm::CloneModule(const Module *M) {
 Module *llvm::CloneModule(const Module *M,
                           DenseMap<const Value*, Value*> &ValueMap) {
   // First off, we need to create the new module...
-  Module *New = new Module(M->getModuleIdentifier());
+  Module *New = new Module(M->getModuleIdentifier(), M->getContext());
   New->setDataLayout(M->getDataLayout());
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 03d273d..d6b167f 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -42,6 +42,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SetOperations.h"
@@ -258,6 +259,79 @@ ReprocessLoop:
       PN->eraseFromParent();
     }
 
+  // If this loop has muliple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+        Instruction *Inst = I++;
+        if (Inst == CI)
+          continue;
+        if (Inst->isTrapping()) {
+          AllInvariant = false;
+          break;
+        }
+        for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
+          if (!L->isLoopInvariant(Inst->getOperand(j))) {
+            AllInvariant = false;
+            break;
+          }
+        if (!AllInvariant)
+          break;
+        // Hoist.
+        Inst->moveBefore(L->getLoopPreheader()->getTerminator());
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI)) continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and dominance frontier, and delete it.
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      LI->removeBlock(ExitingBlock);
+
+      DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      for (unsigned k = 0, g = Children.size(); k != g; ++k) {
+        DT->changeImmediateDominator(Children[k], Node->getIDom());
+        if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
+                                             Node->getIDom()->getBlock(),
+                                             DT);
+      }
+      DT->eraseNode(ExitingBlock);
+      if (DF) DF->removeBlock(ExitingBlock);
+
+      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
   return Changed;
 }
 
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 9af47f5..74e7028 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -89,7 +89,7 @@ bool LowerAllocations::doInitialization(Module &M) {
   const Type *BPTy = PointerType::getUnqual(Type::Int8Ty);
   // Prototype malloc as "char* malloc(...)", because we don't know in
   // doInitialization whether size_t is int or long.
-  FunctionType *FT = FunctionType::get(BPTy, std::vector<const Type*>(), true);
+  FunctionType *FT = FunctionType::get(BPTy, true);
   MallocFunc = M.getOrInsertFunction("malloc", FT);
   FreeFunc = M.getOrInsertFunction("free"  , Type::VoidTy, BPTy, (Type *)0);
   return true;
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
new file mode 100644
index 0000000..4c4dd37
--- /dev/null
+++ b/lib/Transforms/Utils/SSI.cpp
@@ -0,0 +1,390 @@
+//===------------------- SSI.cpp - Creates SSI Representation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts a list of variables to the Static Single Information
+// form. This is a program representation described by Scott Ananian in his
+// Master Thesis: "The Static Single Information Form (1999)".
+// We are building an on-demand representation, that is, we do not convert
+// every single variable in the target function to SSI form. Rather, we receive
+// a list of target variables that must be converted. We also do not
+// completely convert a target variable to the SSI format. Instead, we only
+// change the variable in the points where new information can be attached
+// to its live range, that is, at branch points.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssi"
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SSI.h"
+#include "llvm/Analysis/Dominators.h"
+
+using namespace llvm;
+
+static const std::string SSI_PHI = "SSI_phi";
+static const std::string SSI_SIG = "SSI_sigma";
+
+static const unsigned UNSIGNED_INFINITE = ~0U;
+
+void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominanceFrontier>();
+  AU.addRequired<DominatorTree>();
+  AU.setPreservesAll();
+}
+
+bool SSI::runOnFunction(Function &F) {
+  DT_ = &getAnalysis<DominatorTree>();
+  return false;
+}
+
+/// This methods creates the SSI representation for the list of values
+/// received. It will only create SSI representation if a value is used
+/// in a to decide a branch. Repeated values are created only once.
+///
+void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
+  init(value);
+
+  for (unsigned i = 0; i < num_values; ++i) {
+    if (created.insert(value[i])) {
+      needConstruction[i] = true;
+    }
+  }
+  insertSigmaFunctions(value);
+
+  // Test if there is a need to transform to SSI
+  if (needConstruction.any()) {
+    insertPhiFunctions(value);
+    renameInit(value);
+    rename(DT_->getRoot());
+    fixPhis();
+  }
+
+  clean();
+}
+
+/// Insert sigma functions (a sigma function is a phi function with one
+/// operator)
+///
+void SSI::insertSigmaFunctions(SmallVectorImpl<Instruction *> &value) {
+  for (unsigned i = 0; i < num_values; ++i) {
+    if (!needConstruction[i])
+      continue;
+
+    bool need = false;
+    for (Value::use_iterator begin = value[i]->use_begin(), end =
+         value[i]->use_end(); begin != end; ++begin) {
+      // Test if the Use of the Value is in a comparator
+      CmpInst *CI = dyn_cast<CmpInst>(begin);
+      if (CI && isUsedInTerminator(CI)) {
+        // Basic Block of the Instruction
+        BasicBlock *BB = CI->getParent();
+        // Last Instruction of the Basic Block
+        const TerminatorInst *TI = BB->getTerminator();
+
+        for (unsigned j = 0, e = TI->getNumSuccessors(); j < e; ++j) {
+          // Next Basic Block
+          BasicBlock *BB_next = TI->getSuccessor(j);
+          if (BB_next != BB &&
+              BB_next->getUniquePredecessor() != NULL &&
+              dominateAny(BB_next, value[i])) {
+            PHINode *PN = PHINode::Create(
+                value[i]->getType(), SSI_SIG, BB_next->begin());
+            PN->addIncoming(value[i], BB);
+            sigmas.insert(std::make_pair(PN, i));
+            created.insert(PN);
+            need = true;
+            defsites[i].push_back(BB_next);
+          }
+        }
+      }
+    }
+    needConstruction[i] = need;
+  }
+}
+
+/// Insert phi functions when necessary
+///
+void SSI::insertPhiFunctions(SmallVectorImpl<Instruction *> &value) {
+  DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
+  for (unsigned i = 0; i < num_values; ++i) {
+    // Test if there were any sigmas for this variable
+    if (needConstruction[i]) {
+
+      SmallPtrSet<BasicBlock *, 1> BB_visited;
+
+      // Insert phi functions if there is any sigma function
+      while (!defsites[i].empty()) {
+
+        BasicBlock *BB = defsites[i].back();
+
+        defsites[i].pop_back();
+        DominanceFrontier::iterator DF_BB = DF->find(BB);
+
+        // Iterates through all the dominance frontier of BB
+        for (std::set<BasicBlock *>::iterator DF_BB_begin =
+             DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
+             DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
+          BasicBlock *BB_dominated = *DF_BB_begin;
+
+          // Test if has not yet visited this node and if the
+          // original definition dominates this node
+          if (BB_visited.insert(BB_dominated) &&
+              DT_->properlyDominates(value_original[i], BB_dominated) &&
+              dominateAny(BB_dominated, value[i])) {
+            PHINode *PN = PHINode::Create(
+                value[i]->getType(), SSI_PHI, BB_dominated->begin());
+            phis.insert(std::make_pair(PN, i));
+            created.insert(PN);
+
+            defsites[i].push_back(BB_dominated);
+          }
+        }
+      }
+      BB_visited.clear();
+    }
+  }
+}
+
+/// Some initialization for the rename part
+///
+void SSI::renameInit(SmallVectorImpl<Instruction *> &value) {
+  value_stack.resize(num_values);
+  for (unsigned i = 0; i < num_values; ++i) {
+    value_stack[i].push_back(value[i]);
+  }
+}
+
+/// Renames all variables in the specified BasicBlock.
+/// Only variables that need to be rename will be.
+///
+void SSI::rename(BasicBlock *BB) {
+  BitVector *defined = new BitVector(num_values, false);
+
+  // Iterate through instructions and make appropriate renaming.
+  // For SSI_PHI (b = PHI()), store b at value_stack as a new
+  // definition of the variable it represents.
+  // For SSI_SIG (b = PHI(a)), substitute a with the current
+  // value of a, present in the value_stack.
+  // Then store bin the value_stack as the new definition of a.
+  // For all other instructions (b = OP(a, c, d, ...)), we need to substitute
+  // all operands with its current value, present in value_stack.
+  for (BasicBlock::iterator begin = BB->begin(), end = BB->end();
+       begin != end; ++begin) {
+    Instruction *I = begin;
+    if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
+      int position;
+
+      // Treat SSI_PHI
+      if ((position = getPositionPhi(PN)) != -1) {
+        value_stack[position].push_back(PN);
+        (*defined)[position] = true;
+      }
+
+      // Treat SSI_SIG
+      else if ((position = getPositionSigma(PN)) != -1) {
+        substituteUse(I);
+        value_stack[position].push_back(PN);
+        (*defined)[position] = true;
+      }
+
+      // Treat all other PHI functions
+      else {
+        substituteUse(I);
+      }
+    }
+
+    // Treat all other functions
+    else {
+      substituteUse(I);
+    }
+  }
+
+  // This loop iterates in all BasicBlocks that are successors of the current
+  // BasicBlock. For each SSI_PHI instruction found, insert an operand.
+  // This operand is the current operand in value_stack for the variable
+  // in "position". And the BasicBlock this operand represents is the current
+  // BasicBlock.
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
+    BasicBlock *BB_succ = *SI;
+
+    for (BasicBlock::iterator begin = BB_succ->begin(),
+         notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
+      Instruction *I = begin;
+      PHINode *PN;
+      int position;
+      if ((PN = dyn_cast<PHINode>(I)) && ((position
+          = getPositionPhi(PN)) != -1)) {
+        PN->addIncoming(value_stack[position].back(), BB);
+      }
+    }
+  }
+
+  // This loop calls rename on all children from this block. This time children
+  // refers to a successor block in the dominance tree.
+  DomTreeNode *DTN = DT_->getNode(BB);
+  for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end();
+       begin != end; ++begin) {
+    DomTreeNodeBase<BasicBlock> *DTN_children = *begin;
+    BasicBlock *BB_children = DTN_children->getBlock();
+    rename(BB_children);
+  }
+
+  // Now we remove all inserted definitions of a variable from the top of
+  // the stack leaving the previous one as the top.
+  if (defined->any()) {
+    for (unsigned i = 0; i < num_values; ++i) {
+      if ((*defined)[i]) {
+        value_stack[i].pop_back();
+      }
+    }
+  }
+}
+
+/// Substitute any use in this instruction for the last definition of
+/// the variable
+///
+void SSI::substituteUse(Instruction *I) {
+  for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+    Value *operand = I->getOperand(i);
+    for (unsigned j = 0; j < num_values; ++j) {
+      if (operand == value_stack[j].front() &&
+          I != value_stack[j].back()) {
+        PHINode *PN_I = dyn_cast<PHINode>(I);
+        PHINode *PN_vs = dyn_cast<PHINode>(value_stack[j].back());
+
+        // If a phi created in a BasicBlock is used as an operand of another
+        // created in the same BasicBlock, this step marks this second phi,
+        // to fix this issue later. It cannot be fixed now, because the
+        // operands of the first phi are not final yet.
+        if (PN_I && PN_vs &&
+            value_stack[j].back()->getParent() == I->getParent()) {
+
+          phisToFix.insert(PN_I);
+        }
+
+        I->setOperand(i, value_stack[j].back());
+        break;
+      }
+    }
+  }
+}
+
+/// Test if the BasicBlock BB dominates any use or definition of value.
+///
+bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
+  for (Value::use_iterator begin = value->use_begin(),
+       end = value->use_end(); begin != end; ++begin) {
+    Instruction *I = cast<Instruction>(*begin);
+    BasicBlock *BB_father = I->getParent();
+    if (DT_->dominates(BB, BB_father)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// When there is a phi node that is created in a BasicBlock and it is used
+/// as an operand of another phi function used in the same BasicBlock,
+/// LLVM looks this as an error. So on the second phi, the first phi is called
+/// P and the BasicBlock it incomes is B. This P will be replaced by the value
+/// it has for BasicBlock B.
+///
+void SSI::fixPhis() {
+  for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
+       end = phisToFix.end(); begin != end; ++begin) {
+    PHINode *PN = *begin;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+      PHINode *PN_father;
+      if ((PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i))) &&
+          PN->getParent() == PN_father->getParent()) {
+        BasicBlock *BB = PN->getIncomingBlock(i);
+        int pos = PN_father->getBasicBlockIndex(BB);
+        PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
+      }
+    }
+  }
+}
+
+/// Return which variable (position on the vector of variables) this phi
+/// represents on the phis list.
+///
+unsigned SSI::getPositionPhi(PHINode *PN) {
+  DenseMap<PHINode *, unsigned>::iterator val = phis.find(PN);
+  if (val == phis.end())
+    return UNSIGNED_INFINITE;
+  else
+    return val->second;
+}
+
+/// Return which variable (position on the vector of variables) this phi
+/// represents on the sigmas list.
+///
+unsigned SSI::getPositionSigma(PHINode *PN) {
+  DenseMap<PHINode *, unsigned>::iterator val = sigmas.find(PN);
+  if (val == sigmas.end())
+    return UNSIGNED_INFINITE;
+  else
+    return val->second;
+}
+
+/// Return true if the the Comparison Instruction is an operator
+/// of the Terminator instruction of its Basic Block.
+///
+unsigned SSI::isUsedInTerminator(CmpInst *CI) {
+  TerminatorInst *TI = CI->getParent()->getTerminator();
+  if (TI->getNumOperands() == 0) {
+    return false;
+  } else if (CI == TI->getOperand(0)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+/// Initializes
+///
+void SSI::init(SmallVectorImpl<Instruction *> &value) {
+  num_values = value.size();
+  needConstruction.resize(num_values, false);
+
+  value_original.resize(num_values);
+  defsites.resize(num_values);
+
+  for (unsigned i = 0; i < num_values; ++i) {
+    value_original[i] = value[i]->getParent();
+    defsites[i].push_back(value_original[i]);
+  }
+}
+
+/// Clean all used resources in this creation of SSI
+///
+void SSI::clean() {
+  for (unsigned i = 0; i < num_values; ++i) {
+    defsites[i].clear();
+    if (i < value_stack.size())
+      value_stack[i].clear();
+  }
+
+  phis.clear();
+  sigmas.clear();
+  phisToFix.clear();
+
+  defsites.clear();
+  value_stack.clear();
+  value_original.clear();
+  needConstruction.clear();
+}
+
+/// createSSIPass - The public interface to this file...
+///
+FunctionPass *llvm::createSSIPass() { return new SSI(); }
+
+char SSI::ID = 0;
+static RegisterPass<SSI> X("ssi", "Static Single Information Construction");
+
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index ee0f6a6..58d4d5a 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1492,7 +1492,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
 /// and if a predecessor branches to us and one of our successors, fold the
 /// setcc into the predecessor and use logical operations to pick the right
 /// destination.
-static bool FoldBranchToCommonDest(BranchInst *BI) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   BasicBlock *BB = BI->getParent();
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
   if (Cond == 0) return false;
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 73b1ed6..cbf7070 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cctype>
+#include <map>
 using namespace llvm;
 
 // Make virtual table appear in this compilation unit.
@@ -945,25 +946,6 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     return;
   }
 
-  if (const MDNode *N = dyn_cast<MDNode>(CV)) {
-    Out << "!{";
-    for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end();
-         I != E;) {
-      if (!*I) {
-        Out << "null";
-      } else {
-        TypePrinter.print((*I)->getType(), Out);
-        Out << ' ';
-        WriteAsOperandInternal(Out, *I, TypePrinter, Machine);
-      }
-
-      if (++I != E)
-        Out << ", ";
-    }
-    Out << "}";
-    return;
-  }
-  
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     Out << CE->getOpcodeName();
     if (CE->isCompare())
@@ -1092,10 +1074,14 @@ class AssemblyWriter {
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
   std::vector<const Type*> NumberedTypes;
+
+  // Each MDNode is assigned unique MetadataIDNo.
+  std::map<const MDNode *, unsigned> MDNodes;
+  unsigned MetadataIDNo;
 public:
   inline AssemblyWriter(raw_ostream &o, SlotTracker &Mac, const Module *M,
                         AssemblyAnnotationWriter *AAW)
-    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
+    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW), MetadataIDNo(0) {
     AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
   }
 
@@ -1117,6 +1103,7 @@ public:
 
   void writeOperand(const Value *Op, bool PrintType);
   void writeParamOperand(const Value *Operand, Attributes Attrs);
+  void printMDNode(const MDNode *Node, bool StandAlone);
 
   const Module* getModule() { return TheModule; }
 
@@ -1264,6 +1251,28 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
 }
 
 void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+  if (GV->hasInitializer())
+    // If GV is initialized using Metadata then separate out metadata
+    // operands used by the initializer. Note, MDNodes are not cyclic.
+    if (MDNode *N = dyn_cast<MDNode>(GV->getInitializer())) {
+      SmallVector<const MDNode *, 4> WorkList;
+      // Collect MDNodes used by the initializer.
+      for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end();
+	   I != E; ++I) {
+	const Value *TV = *I;
+	if (TV)
+	  if (const MDNode *NN = dyn_cast<MDNode>(TV))
+	    WorkList.push_back(NN);
+      }
+
+      // Print MDNodes used by the initializer.
+      while (!WorkList.empty()) {
+	const MDNode *N = WorkList.back(); WorkList.pop_back();
+	printMDNode(N, true);
+	Out << '\n';
+      }
+    }
+
   if (GV->hasName()) {
     PrintLLVMName(Out, GV);
     Out << " = ";
@@ -1283,7 +1292,10 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
 
   if (GV->hasInitializer()) {
     Out << ' ';
-    writeOperand(GV->getInitializer(), false);
+    if (MDNode *N = dyn_cast<MDNode>(GV->getInitializer()))
+      printMDNode(N, false);
+    else
+      writeOperand(GV->getInitializer(), false);
   }
     
   if (GV->hasSection())
@@ -1295,6 +1307,47 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
   Out << '\n';
 }
 
+void AssemblyWriter::printMDNode(const MDNode *Node,
+				 bool StandAlone) {
+  std::map<const MDNode *, unsigned>::iterator MI = MDNodes.find(Node);
+  // If this node is already printed then just refer it using its Metadata
+  // id number.
+  if (MI != MDNodes.end()) {
+    if (!StandAlone)
+      Out << "!" << MI->second;
+    return;
+  }
+  
+  if (StandAlone) {
+    // Print standalone MDNode.
+    // !42 = !{ ... }
+    Out << "!" << MetadataIDNo << " = ";
+    Out << "constant metadata ";
+  }
+
+  Out << "!{";
+  for (MDNode::const_elem_iterator I = Node->elem_begin(), E = Node->elem_end();
+       I != E;) {
+    const Value *TV = *I;
+    if (!TV)
+      Out << "null";
+    else if (const MDNode *N = dyn_cast<MDNode>(TV)) {
+      TypePrinter.print(N->getType(), Out);
+      Out << ' ';
+      printMDNode(N, StandAlone);
+    }
+    else if (!*I)
+      Out << "null";
+    else 
+      writeOperand(*I, true);
+    if (++I != E)
+      Out << ", ";
+  }
+  Out << "}";
+
+  MDNodes[Node] = MetadataIDNo++;
+}
+
 void AssemblyWriter::printAlias(const GlobalAlias *GA) {
   // Don't crash when dumping partially built GA
   if (!GA->hasName())
@@ -1852,6 +1905,14 @@ void Value::print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const {
     SlotTracker SlotTable(GV->getParent());
     AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
     W.write(GV);
+  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+    TypePrinting TypePrinter;
+    TypePrinter.print(N->getType(), OS);
+    OS << ' ';
+    // FIXME: Do we need a slot tracker for metadata ?
+    SlotTracker SlotTable((const Function *)NULL);
+    AssemblyWriter W(OS, SlotTable, NULL, AAW);
+    W.printMDNode(N, false);
   } else if (const Constant *C = dyn_cast<Constant>(this)) {
     TypePrinting TypePrinter;
     TypePrinter.print(C->getType(), OS);
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index d78e093..c9cdce4 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMCore
   Instructions.cpp
   IntrinsicInst.cpp
   LeakDetector.cpp
+  LLVMContext.cpp
   Mangler.cpp
   Module.cpp
   ModuleProvider.cpp
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index f85dbe7..6eb1889 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -18,6 +18,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/InlineAsm.h"
@@ -38,10 +39,30 @@ void LLVMDisposeMessage(char *Message) {
 }
 
 
+/*===-- Operations on contexts --------------------------------------------===*/
+
+LLVMContextRef LLVMContextCreate() {
+  return wrap(new LLVMContext());
+}
+
+LLVMContextRef LLVMGetGlobalContext() {
+  return wrap(&getGlobalContext());
+}
+
+void LLVMContextDispose(LLVMContextRef C) {
+  delete unwrap(C);
+}
+
+
 /*===-- Operations on modules ---------------------------------------------===*/
 
 LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
-  return wrap(new Module(ModuleID));
+  return wrap(new Module(ModuleID, getGlobalContext()));
+}
+
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, 
+                                                LLVMContextRef C) {
+  return wrap(new Module(ModuleID, *unwrap(C)));
 }
 
 void LLVMDisposeModule(LLVMModuleRef M) {
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 0450566..eeade05 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -114,6 +114,12 @@ void Argument::removeAttr(Attributes attr) {
 // Helper Methods in Function
 //===----------------------------------------------------------------------===//
 
+LLVMContext* Function::getContext() {
+  Module* M = getParent();
+  if (M) return &M->getContext();
+  return 0;
+}
+
 const FunctionType *Function::getFunctionType() const {
   return cast<FunctionType>(getType()->getElementType());
 }
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
new file mode 100644
index 0000000..fe2cb7b
--- /dev/null
+++ b/lib/VMCore/LLVMContext.cpp
@@ -0,0 +1,489 @@
+//===-- LLVMContext.cpp - Implement LLVMContext -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements LLVMContext, as a wrapper around the opaque
+// class LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MDNode.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "LLVMContextImpl.h"
+
+using namespace llvm;
+
+static ManagedStatic<LLVMContext> GlobalContext;
+
+LLVMContext& llvm::getGlobalContext() {
+  return *GlobalContext;
+}
+
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl()) { }
+LLVMContext::~LLVMContext() { delete pImpl; }
+
+// Constant accessors
+Constant* LLVMContext::getNullValue(const Type* Ty) {
+  return Constant::getNullValue(Ty);
+}
+
+Constant* LLVMContext::getAllOnesValue(const Type* Ty) {
+  return Constant::getAllOnesValue(Ty);
+}
+
+// UndefValue accessors.
+UndefValue* LLVMContext::getUndef(const Type* Ty) {
+  return UndefValue::get(Ty);
+}
+
+// ConstantInt accessors.
+ConstantInt* LLVMContext::getConstantIntTrue() {
+  return ConstantInt::getTrue();
+}
+
+ConstantInt* LLVMContext::getConstantIntFalse() {
+  return ConstantInt::getFalse();
+}
+
+Constant* LLVMContext::getConstantInt(const Type* Ty, uint64_t V,
+                                         bool isSigned) {
+  return ConstantInt::get(Ty, V, isSigned);
+}
+
+
+ConstantInt* LLVMContext::getConstantInt(const IntegerType* Ty, uint64_t V,
+                                         bool isSigned) {
+  return ConstantInt::get(Ty, V, isSigned);
+}
+
+ConstantInt* LLVMContext::getConstantIntSigned(const IntegerType* Ty,
+                                               int64_t V) {
+  return ConstantInt::getSigned(Ty, V);
+}
+
+ConstantInt* LLVMContext::getConstantInt(const APInt& V) {
+  return ConstantInt::get(V);
+}
+
+Constant* LLVMContext::getConstantInt(const Type* Ty, const APInt& V) {
+  return ConstantInt::get(Ty, V);
+}
+
+ConstantInt* LLVMContext::getConstantIntAllOnesValue(const Type* Ty) {
+  return ConstantInt::getAllOnesValue(Ty);
+}
+
+
+// ConstantPointerNull accessors.
+ConstantPointerNull* LLVMContext::getConstantPointerNull(const PointerType* T) {
+  return ConstantPointerNull::get(T);
+}
+
+
+// ConstantStruct accessors.
+Constant* LLVMContext::getConstantStruct(const StructType* T,
+                                         const std::vector<Constant*>& V) {
+  return ConstantStruct::get(T, V);
+}
+
+Constant* LLVMContext::getConstantStruct(const std::vector<Constant*>& V,
+                                         bool Packed) {
+  return ConstantStruct::get(V, Packed);
+}
+
+Constant* LLVMContext::getConstantStruct(Constant* const *Vals,
+                                         unsigned NumVals, bool Packed) {
+  return ConstantStruct::get(Vals, NumVals, Packed);
+}
+
+
+// ConstantAggregateZero accessors.
+ConstantAggregateZero* LLVMContext::getConstantAggregateZero(const Type* Ty) {
+  return ConstantAggregateZero::get(Ty);
+}
+
+
+// ConstantArray accessors.
+Constant* LLVMContext::getConstantArray(const ArrayType* T,
+                                        const std::vector<Constant*>& V) {
+  return ConstantArray::get(T, V);
+}
+
+Constant* LLVMContext::getConstantArray(const ArrayType* T,
+                                        Constant* const* Vals,
+                                        unsigned NumVals) {
+  return ConstantArray::get(T, Vals, NumVals);
+}
+
+Constant* LLVMContext::getConstantArray(const std::string& Initializer,
+                                        bool AddNull) {
+  return ConstantArray::get(Initializer, AddNull);
+}
+
+
+// ConstantExpr accessors.
+Constant* LLVMContext::getConstantExpr(unsigned Opcode, Constant* C1,
+                                       Constant* C2) {
+  return ConstantExpr::get(Opcode, C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprTrunc(Constant* C, const Type* Ty) {
+  return ConstantExpr::getTrunc(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprSExt(Constant* C, const Type* Ty) {
+  return ConstantExpr::getSExt(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprZExt(Constant* C, const Type* Ty) {
+  return ConstantExpr::getZExt(C, Ty);  
+}
+
+Constant* LLVMContext::getConstantExprFPTrunc(Constant* C, const Type* Ty) {
+  return ConstantExpr::getFPTrunc(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprFPExtend(Constant* C, const Type* Ty) {
+  return ConstantExpr::getFPExtend(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprUIToFP(Constant* C, const Type* Ty) {
+  return ConstantExpr::getUIToFP(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprSIToFP(Constant* C, const Type* Ty) {
+  return ConstantExpr::getSIToFP(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprFPToUI(Constant* C, const Type* Ty) {
+  return ConstantExpr::getFPToUI(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprFPToSI(Constant* C, const Type* Ty) {
+  return ConstantExpr::getFPToSI(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprPtrToInt(Constant* C, const Type* Ty) {
+  return ConstantExpr::getPtrToInt(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprIntToPtr(Constant* C, const Type* Ty) {
+  return ConstantExpr::getIntToPtr(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprBitCast(Constant* C, const Type* Ty) {
+  return ConstantExpr::getBitCast(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprCast(unsigned ops, Constant* C,
+                                           const Type* Ty) {
+  return ConstantExpr::getCast(ops, C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprZExtOrBitCast(Constant* C,
+                                                    const Type* Ty) {
+  return ConstantExpr::getZExtOrBitCast(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprSExtOrBitCast(Constant* C,
+                                                    const Type* Ty) {
+  return ConstantExpr::getSExtOrBitCast(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprTruncOrBitCast(Constant* C,
+                                                     const Type* Ty) {
+  return ConstantExpr::getTruncOrBitCast(C, Ty);  
+}
+
+Constant* LLVMContext::getConstantExprPointerCast(Constant* C, const Type* Ty) {
+  return ConstantExpr::getPointerCast(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprIntegerCast(Constant* C, const Type* Ty,
+                                                  bool isSigned) {
+  return ConstantExpr::getIntegerCast(C, Ty, isSigned);
+}
+
+Constant* LLVMContext::getConstantExprFPCast(Constant* C, const Type* Ty) {
+  return ConstantExpr::getFPCast(C, Ty);
+}
+
+Constant* LLVMContext::getConstantExprSelect(Constant* C, Constant* V1,
+                                             Constant* V2) {
+  return ConstantExpr::getSelect(C, V1, V2);
+}
+
+Constant* LLVMContext::getConstantExprAlignOf(const Type* Ty) {
+  return ConstantExpr::getAlignOf(Ty);
+}
+
+Constant* LLVMContext::getConstantExprCompare(unsigned short pred,
+                                 Constant* C1, Constant* C2) {
+  return ConstantExpr::getCompare(pred, C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprNeg(Constant* C) {
+  return ConstantExpr::getNeg(C);
+}
+
+Constant* LLVMContext::getConstantExprFNeg(Constant* C) {
+  return ConstantExpr::getFNeg(C);
+}
+
+Constant* LLVMContext::getConstantExprNot(Constant* C) {
+  return ConstantExpr::getNot(C);
+}
+
+Constant* LLVMContext::getConstantExprAdd(Constant* C1, Constant* C2) {
+  return ConstantExpr::getAdd(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprFAdd(Constant* C1, Constant* C2) {
+  return ConstantExpr::getFAdd(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprSub(Constant* C1, Constant* C2) {
+  return ConstantExpr::getSub(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprFSub(Constant* C1, Constant* C2) {
+  return ConstantExpr::getFSub(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprMul(Constant* C1, Constant* C2) {
+  return ConstantExpr::getMul(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprFMul(Constant* C1, Constant* C2) {
+  return ConstantExpr::getFMul(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprUDiv(Constant* C1, Constant* C2) {
+  return ConstantExpr::getUDiv(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprSDiv(Constant* C1, Constant* C2) {
+  return ConstantExpr::getSDiv(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprFDiv(Constant* C1, Constant* C2) {
+  return ConstantExpr::getFDiv(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprURem(Constant* C1, Constant* C2) {
+  return ConstantExpr::getURem(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprSRem(Constant* C1, Constant* C2) {
+  return ConstantExpr::getSRem(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprFRem(Constant* C1, Constant* C2) {
+  return ConstantExpr::getFRem(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprAnd(Constant* C1, Constant* C2) {
+  return ConstantExpr::getAnd(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprOr(Constant* C1, Constant* C2) {
+  return ConstantExpr::getOr(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprXor(Constant* C1, Constant* C2) {
+  return ConstantExpr::getXor(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprICmp(unsigned short pred, Constant* LHS,
+                              Constant* RHS) {
+  return ConstantExpr::getICmp(pred, LHS, RHS);
+}
+
+Constant* LLVMContext::getConstantExprFCmp(unsigned short pred, Constant* LHS,
+                              Constant* RHS) {
+  return ConstantExpr::getFCmp(pred, LHS, RHS);
+}
+
+Constant* LLVMContext::getConstantExprVICmp(unsigned short pred, Constant* LHS,
+                               Constant* RHS) {
+  return ConstantExpr::getVICmp(pred, LHS, RHS);
+}
+
+Constant* LLVMContext::getConstantExprVFCmp(unsigned short pred, Constant* LHS,
+                               Constant* RHS) {
+  return ConstantExpr::getVFCmp(pred, LHS, RHS);
+}
+
+Constant* LLVMContext::getConstantExprShl(Constant* C1, Constant* C2) {
+  return ConstantExpr::getShl(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprLShr(Constant* C1, Constant* C2) {
+  return ConstantExpr::getLShr(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprAShr(Constant* C1, Constant* C2) {
+  return ConstantExpr::getAShr(C1, C2);
+}
+
+Constant* LLVMContext::getConstantExprGetElementPtr(Constant* C,
+                                                    Constant* const* IdxList, 
+                                                    unsigned NumIdx) {
+  return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
+}
+
+Constant* LLVMContext::getConstantExprGetElementPtr(Constant* C,
+                                                    Value* const* IdxList, 
+                                                    unsigned NumIdx) {
+  return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
+}
+
+Constant* LLVMContext::getConstantExprExtractElement(Constant* Vec,
+                                                     Constant* Idx) {
+  return ConstantExpr::getExtractElement(Vec, Idx);
+}
+
+Constant* LLVMContext::getConstantExprInsertElement(Constant* Vec,
+                                                    Constant* Elt,
+                                                    Constant* Idx) {
+  return ConstantExpr::getInsertElement(Vec, Elt, Idx);
+}
+
+Constant* LLVMContext::getConstantExprShuffleVector(Constant* V1, Constant* V2,
+                                                    Constant* Mask) {
+  return ConstantExpr::getShuffleVector(V1, V2, Mask);
+}
+
+Constant* LLVMContext::getConstantExprExtractValue(Constant* Agg,
+                                                   const unsigned* IdxList, 
+                                                   unsigned NumIdx) {
+  return ConstantExpr::getExtractValue(Agg, IdxList, NumIdx);
+}
+
+Constant* LLVMContext::getConstantExprInsertValue(Constant* Agg, Constant* Val,
+                                                  const unsigned* IdxList,
+                                                  unsigned NumIdx) {
+  return ConstantExpr::getInsertValue(Agg, Val, IdxList, NumIdx);
+}
+
+Constant* LLVMContext::getZeroValueForNegation(const Type* Ty) {
+  return ConstantExpr::getZeroValueForNegationExpr(Ty);
+}
+
+
+// ConstantFP accessors.
+ConstantFP* LLVMContext::getConstantFP(const APFloat& V) {
+  return ConstantFP::get(V);
+}
+
+Constant* LLVMContext::getConstantFP(const Type* Ty, double V) {
+  return ConstantFP::get(Ty, V);
+}
+
+ConstantFP* LLVMContext::getConstantFPNegativeZero(const Type* Ty) {
+  return ConstantFP::getNegativeZero(Ty);
+}
+
+
+// ConstantVector accessors.
+Constant* LLVMContext::getConstantVector(const VectorType* T,
+                            const std::vector<Constant*>& V) {
+  return ConstantVector::get(T, V);
+}
+
+Constant* LLVMContext::getConstantVector(const std::vector<Constant*>& V) {
+  return ConstantVector::get(V);
+}
+
+Constant* LLVMContext::getConstantVector(Constant* const* Vals,
+                                         unsigned NumVals) {
+  return ConstantVector::get(Vals, NumVals);
+}
+
+ConstantVector* LLVMContext::getConstantVectorAllOnesValue(
+                                                         const VectorType* Ty) {
+  return ConstantVector::getAllOnesValue(Ty);
+}
+
+// MDNode accessors
+MDNode* LLVMContext::getMDNode(Value* const* Vals, unsigned NumVals) {
+  return MDNode::get(Vals, NumVals);
+}
+
+// MDString accessors
+MDString* LLVMContext::getMDString(const char *StrBegin, const char *StrEnd) {
+  return MDString::get(StrBegin, StrEnd);
+}
+
+MDString* LLVMContext::getMDString(const std::string &Str) {
+  return MDString::get(Str);
+}
+
+// FunctionType accessors
+FunctionType* LLVMContext::getFunctionType(const Type* Result,
+                                         const std::vector<const Type*>& Params,
+                                         bool isVarArg) {
+  return FunctionType::get(Result, Params, isVarArg);
+}
+                                
+// IntegerType accessors
+const IntegerType* LLVMContext::getIntegerType(unsigned NumBits) {
+  return IntegerType::get(NumBits);
+}
+  
+// OpaqueType accessors
+OpaqueType* LLVMContext::getOpaqueType() {
+  return OpaqueType::get();
+}
+
+// StructType accessors
+StructType* LLVMContext::getStructType(bool isPacked) {
+  return StructType::get(isPacked);
+}
+
+StructType* LLVMContext::getStructType(const std::vector<const Type*>& Params,
+                                       bool isPacked) {
+  return StructType::get(Params, isPacked);
+}
+
+// ArrayType accessors
+ArrayType* LLVMContext::getArrayType(const Type* ElementType,
+                                     uint64_t NumElements) {
+  return ArrayType::get(ElementType, NumElements);
+}
+  
+// PointerType accessors
+PointerType* LLVMContext::getPointerType(const Type* ElementType,
+                                         unsigned AddressSpace) {
+  return PointerType::get(ElementType, AddressSpace);
+}
+
+PointerType* LLVMContext::getPointerTypeUnqual(const Type* ElementType) {
+  return PointerType::getUnqual(ElementType);
+}
+  
+// VectorType accessors
+VectorType* LLVMContext::getVectorType(const Type* ElementType,
+                                       unsigned NumElements) {
+  return VectorType::get(ElementType, NumElements);
+}
+
+VectorType* LLVMContext::getVectorTypeInteger(const VectorType* VTy) {
+  return VectorType::getInteger(VTy);  
+}
+
+VectorType* LLVMContext::getVectorTypeExtendedElement(const VectorType* VTy) {
+  return VectorType::getExtendedElementVectorType(VTy);
+}
+
+VectorType* LLVMContext::getVectorTypeTruncatedElement(const VectorType* VTy) {
+  return VectorType::getTruncatedElementVectorType(VTy);
+}
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
new file mode 100644
index 0000000..4e089fb
--- /dev/null
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -0,0 +1,25 @@
+//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file declares LLVMContextImpl, the opaque implementation 
+//  of LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LLVMCONTEXT_IMPL_H
+#define LLVM_LLVMCONTEXT_IMPL_H
+
+namespace llvm {
+class LLVMContextImpl {
+
+};
+
+}
+
+#endif
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index a598005..f057e81 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -15,6 +15,7 @@
 #include "llvm/InstrTypes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/LeakDetector.h"
@@ -54,8 +55,8 @@ template class SymbolTableListTraits<GlobalAlias, Module>;
 // Primitive Module methods.
 //
 
-Module::Module(const std::string &MID)
-  : ModuleID(MID), DataLayout("") {
+Module::Module(const std::string &MID, LLVMContext& C)
+  : Context(C), ModuleID(MID), DataLayout("")  {
   ValSymTab = new ValueSymbolTable();
   TypeSymTab = new TypeSymbolTable();
 }
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 86cf10e..46f1243 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -165,11 +165,13 @@ namespace llvm {
 class FunctionPassManagerImpl : public Pass,
                                 public PMDataManager,
                                 public PMTopLevelManager {
+private:
+  bool wasRun;
 public:
   static char ID;
   explicit FunctionPassManagerImpl(int Depth) : 
     Pass(&ID), PMDataManager(Depth), 
-    PMTopLevelManager(TLM_Function) { }
+    PMTopLevelManager(TLM_Function), wasRun(false) { }
 
   /// add - Add a pass to the queue of passes to run.  This passes ownership of
   /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
@@ -179,6 +181,10 @@ public:
     schedulePass(P);
   }
  
+  // Prepare for running an on the fly pass, freeing memory if needed
+  // from a previous run.
+  void releaseMemoryOnTheFly();
+
   /// run - Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
   bool run(Function &F);
@@ -272,8 +278,10 @@ public:
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       ModulePass *MP = getContainedPass(Index);
       MP->dumpPassStructure(Offset + 1);
-      if (FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP])
-        FPP->dumpPassStructure(Offset + 2);
+      std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
+        OnTheFlyManagers.find(MP);
+      if (I != OnTheFlyManagers.end())
+        I->second->dumpPassStructure(Offset + 2);
       dumpLastUses(MP, Offset+1);
     }
   }
@@ -1290,6 +1298,18 @@ void FPPassManager::cleanup() {
  }
 }
 
+void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
+  if (!wasRun)
+    return;
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
+    FPPassManager *FPPM = getContainedManager(Index);
+    for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
+      FPPM->getContainedPass(Index)->releaseMemory();
+    }
+  }
+  wasRun = false;
+}
+
 // Execute all the passes managed by this top level manager.
 // Return true if any function is modified by a pass.
 bool FunctionPassManagerImpl::run(Function &F) {
@@ -1306,6 +1326,7 @@ bool FunctionPassManagerImpl::run(Function &F) {
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
     getContainedManager(Index)->cleanup();
 
+  wasRun = true;
   return Changed;
 }
 
@@ -1404,6 +1425,14 @@ bool
 MPPassManager::runOnModule(Module &M) {
   bool Changed = false;
 
+  // Initialize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    Changed |= FPP->doInitialization(M);
+  }
+
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     ModulePass *MP = getContainedPass(Index);
 
@@ -1430,6 +1459,17 @@ MPPassManager::runOnModule(Module &M) {
     recordAvailableAnalysis(MP);
     removeDeadPasses(MP, M.getModuleIdentifier().c_str(), ON_MODULE_MSG);
   }
+
+  // Finalize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    // We don't know when is the last time an on-the-fly pass is run,
+    // so we need to releaseMemory / finalize here
+    FPP->releaseMemoryOnTheFly();
+    Changed |= FPP->doFinalization(M);
+  }
   return Changed;
 }
 
@@ -1466,6 +1506,7 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F){
   FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
   assert(FPP && "Unable to find on the fly pass");
   
+  FPP->releaseMemoryOnTheFly();
   FPP->run(F);
   return (dynamic_cast<PMTopLevelManager *>(FPP))->findAnalysisPass(PI);
 }
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 5df7f12..40d7517 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -43,8 +43,8 @@ AbstractTypeUser::~AbstractTypeUser() {}
 //                         Type Class Implementation
 //===----------------------------------------------------------------------===//
 
-// Reader/writer lock used for guarding access to the type maps.
-static ManagedStatic<sys::SmartRWMutex<true> > TypeMapLock;
+// Lock used for guarding access to the type maps.
+static ManagedStatic<sys::SmartMutex<true> > TypeMapLock;
 
 // Recursive lock used for guarding access to AbstractTypeUsers.
 // NOTE: The true template parameter means this will no-op when we're not in
@@ -1006,23 +1006,13 @@ const IntegerType *IntegerType::get(unsigned NumBits) {
   
   // First, see if the type is already in the table, for which
   // a reader lock suffices.
-  TypeMapLock->reader_acquire();
+  sys::SmartScopedLock<true> L(&*TypeMapLock);
   ITy = IntegerTypes->get(IVT);
-  TypeMapLock->reader_release();
     
   if (!ITy) {
-    // OK, not in the table, get a writer lock.
-    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
-    ITy = IntegerTypes->get(IVT);
-      
-    // We need to _recheck_ the table in case someone
-    // put it in between when we released the reader lock
-    // and when we gained the writer lock!
-    if (!ITy) {
-      // Value not found.  Derive a new type!
-      ITy = new IntegerType(NumBits);
-      IntegerTypes->add(IVT, ITy);
-    }
+    // Value not found.  Derive a new type!
+    ITy = new IntegerType(NumBits);
+    IntegerTypes->add(IVT, ITy);
   }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *ITy << "\n";
@@ -1089,23 +1079,14 @@ FunctionType *FunctionType::get(const Type *ReturnType,
   FunctionValType VT(ReturnType, Params, isVarArg);
   FunctionType *FT = 0;
   
-  TypeMapLock->reader_acquire();
+  sys::SmartScopedLock<true> L(&*TypeMapLock);
   FT = FunctionTypes->get(VT);
-  TypeMapLock->reader_release();
   
   if (!FT) {
-    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
-    
-    // Have to check again here, because it might have
-    // been inserted between when we release the reader
-    // lock and when we acquired the writer lock.
-    FT = FunctionTypes->get(VT);
-    if (!FT) {
-      FT = (FunctionType*) operator new(sizeof(FunctionType) +
-                                      sizeof(PATypeHandle)*(Params.size()+1));
-      new (FT) FunctionType(ReturnType, Params, isVarArg);
-      FunctionTypes->add(VT, FT);
-    }
+    FT = (FunctionType*) operator new(sizeof(FunctionType) +
+                                    sizeof(PATypeHandle)*(Params.size()+1));
+    new (FT) FunctionType(ReturnType, Params, isVarArg);
+    FunctionTypes->add(VT, FT);
   }
 
 #ifdef DEBUG_MERGE_TYPES
@@ -1148,19 +1129,12 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
   ArrayValType AVT(ElementType, NumElements);
   ArrayType *AT = 0;
   
-  TypeMapLock->reader_acquire();
+  sys::SmartScopedLock<true> L(&*TypeMapLock);
   AT = ArrayTypes->get(AVT);
-  TypeMapLock->reader_release();
-    
+      
   if (!AT) {
-    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
-    
-    // Recheck.  Might have changed between release and acquire.
-    AT = ArrayTypes->get(AVT);
-    if (!AT) {
-      // Value not found.  Derive a new type!
-      ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
-    }
+    // Value not found.  Derive a new type!
+    ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
   }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *AT << "\n";
@@ -1214,17 +1188,11 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
   VectorValType PVT(ElementType, NumElements);
   VectorType *PT = 0;
   
-  TypeMapLock->reader_acquire();
+  sys::SmartScopedLock<true> L(&*TypeMapLock);
   PT = VectorTypes->get(PVT);
-  TypeMapLock->reader_release();
     
   if (!PT) {
-    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
-    PT = VectorTypes->get(PVT);
-    // Recheck.  Might have changed between release and acquire.
-    if (!PT) {
-      VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
-    }
+    VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
   }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *PT << "\n";
@@ -1282,21 +1250,15 @@ StructType *StructType::get(const std::vector<const Type*> &ETypes,
   StructValType STV(ETypes, isPacked);
   StructType *ST = 0;
   
-  TypeMapLock->reader_acquire();
+  sys::SmartScopedLock<true> L(&*TypeMapLock);
   ST = StructTypes->get(STV);
-  TypeMapLock->reader_release();
     
   if (!ST) {
-    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
-    ST = StructTypes->get(STV);
-    // Recheck.  Might have changed between release and acquire.
-    if (!ST) {
-      // Value not found.  Derive a new type!
-      ST = (StructType*) operator new(sizeof(StructType) +
-                                      sizeof(PATypeHandle) * ETypes.size());
-      new (ST) StructType(ETypes, isPacked);
-      StructTypes->add(STV, ST);
-    }
+    // Value not found.  Derive a new type!
+    ST = (StructType*) operator new(sizeof(StructType) +
+                                    sizeof(PATypeHandle) * ETypes.size());
+    new (ST) StructType(ETypes, isPacked);
+    StructTypes->add(STV, ST);
   }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *ST << "\n";
@@ -1367,18 +1329,12 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
 
   PointerType *PT = 0;
   
-  TypeMapLock->reader_acquire();
+  sys::SmartScopedLock<true> L(&*TypeMapLock);
   PT = PointerTypes->get(PVT);
-  TypeMapLock->reader_release();
   
   if (!PT) {
-    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
-    PT = PointerTypes->get(PVT);
-    // Recheck.  Might have changed between release and acquire.
-    if (!PT) {
-      // Value not found.  Derive a new type!
-      PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
-    }
+    // Value not found.  Derive a new type!
+    PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
   }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *PT << "\n";
@@ -1532,7 +1488,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
 void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // All recursive calls will go through unlockedRefineAbstractTypeTo,
   // to avoid deadlock problems.
-  sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+  sys::SmartScopedLock<true> L(&*TypeMapLock);
   unlockedRefineAbstractTypeTo(NewType);
 }
 
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index fe4af05..2d207ee 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -54,6 +54,10 @@ bool MVT::isExtended128BitVector() const {
   return isExtendedVector() && getSizeInBits() == 128;
 }
 
+bool MVT::isExtended256BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 256;
+}
+
 MVT MVT::getExtendedVectorElementType() const {
   assert(isExtended() && "Type is not extended!");
   return MVT::getMVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -101,20 +105,26 @@ std::string MVT::getMVTString() const {
   case MVT::Flag:    return "flag";
   case MVT::v2i8:    return "v2i8";
   case MVT::v4i8:    return "v4i8";
-  case MVT::v2i16:   return "v2i16";
   case MVT::v8i8:    return "v8i8";
-  case MVT::v4i16:   return "v4i16";
-  case MVT::v2i32:   return "v2i32";
-  case MVT::v1i64:   return "v1i64";
   case MVT::v16i8:   return "v16i8";
+  case MVT::v32i8:   return "v32i8";
+  case MVT::v2i16:   return "v2i16";
+  case MVT::v4i16:   return "v4i16";
   case MVT::v8i16:   return "v8i16";
+  case MVT::v16i16:  return "v16i16";
+  case MVT::v2i32:   return "v2i32";
+  case MVT::v3i32:   return "v3i32";
   case MVT::v4i32:   return "v4i32";
+  case MVT::v8i32:   return "v8i32";
+  case MVT::v1i64:   return "v1i64";
   case MVT::v2i64:   return "v2i64";
+  case MVT::v4i64:   return "v4i64";
   case MVT::v2f32:   return "v2f32";
+  case MVT::v3f32:   return "v3f32";
   case MVT::v4f32:   return "v4f32";
+  case MVT::v8f32:   return "v8f32";
   case MVT::v2f64:   return "v2f64";
-  case MVT::v3i32:   return "v3i32";
-  case MVT::v3f32:   return "v3f32";
+  case MVT::v4f64:   return "v4f64";
   }
 }
 
@@ -140,21 +150,27 @@ const Type *MVT::getTypeForMVT() const {
   case MVT::ppcf128: return Type::PPC_FP128Ty;
   case MVT::v2i8:    return VectorType::get(Type::Int8Ty, 2);
   case MVT::v4i8:    return VectorType::get(Type::Int8Ty, 4);
-  case MVT::v2i16:    return VectorType::get(Type::Int16Ty, 2);
   case MVT::v8i8:    return VectorType::get(Type::Int8Ty, 8);
+  case MVT::v16i8:   return VectorType::get(Type::Int8Ty, 16);
+  case MVT::v32i8:   return VectorType::get(Type::Int8Ty, 32);
+  case MVT::v2i16:   return VectorType::get(Type::Int16Ty, 2);
   case MVT::v4i16:   return VectorType::get(Type::Int16Ty, 4);
+  case MVT::v8i16:   return VectorType::get(Type::Int16Ty, 16);
+  case MVT::v16i16:  return VectorType::get(Type::Int16Ty, 8);
   case MVT::v2i32:   return VectorType::get(Type::Int32Ty, 2);
-  case MVT::v1i64:   return VectorType::get(Type::Int64Ty, 1);
-  case MVT::v16i8:   return VectorType::get(Type::Int8Ty, 16);
-  case MVT::v8i16:   return VectorType::get(Type::Int16Ty, 8);
+  case MVT::v3i32:   return VectorType::get(Type::Int32Ty, 3);
   case MVT::v4i32:   return VectorType::get(Type::Int32Ty, 4);
+  case MVT::v8i32:   return VectorType::get(Type::Int32Ty, 8);
+  case MVT::v1i64:   return VectorType::get(Type::Int64Ty, 1);
   case MVT::v2i64:   return VectorType::get(Type::Int64Ty, 2);
+  case MVT::v4i64:   return VectorType::get(Type::Int64Ty, 4);
   case MVT::v2f32:   return VectorType::get(Type::FloatTy, 2);
+  case MVT::v3f32:   return VectorType::get(Type::FloatTy, 3);
   case MVT::v4f32:   return VectorType::get(Type::FloatTy, 4);
+  case MVT::v8f32:   return VectorType::get(Type::FloatTy, 8);
   case MVT::v2f64:   return VectorType::get(Type::DoubleTy, 2);
-  case MVT::v3i32:   return VectorType::get(Type::Int32Ty, 3);
-  case MVT::v3f32:   return VectorType::get(Type::FloatTy, 3);
-  }
+  case MVT::v4f64:   return VectorType::get(Type::DoubleTy, 4); 
+ }
 }
 
 /// getMVT - Return the value type corresponding to the specified type.  This